For manuscript “Development and validation of predictive models of early immune effector cell-associated hematotoxicity (eIPMs)” by Liang et al., 2024
library(openxlsx)
library(here)
library(janitor)
library(tidyverse)
library(labelled)
library(zoo)
library(gtsummary)
library(prodlim)
library(ggsurvfit)
library(ggplot2)
library(scales)
library(plotly)
library(ggrepel)
library(patchwork)
library(pmsampsize)
library(caret)
library(rms)
library(tidymodels)
library(glmnet)
library(probably)
library(vip)
library(runway)
library(cutpointr)
library(dcurves)
theme_set(theme_bw())
theme_gtsummary_compact()
# Clinical data
df <-
read.xlsx(
here(
"dfs",
"EPIC Data Export",
"2024.05.29.PatientDemographics for Supplement.xlsx"
),
detectDates = TRUE
)
df <- clean_names(df)
df <- df %>%
filter(cart_num == 1)
# CBC data
df_cbc <-
read.xlsx(here("dfs", "EPIC Data Export", "2024.05.29.hems.xlsx"),
detectDates = TRUE)
df_cbc <- clean_names(df_cbc)
df_cbc <- df_cbc %>%
filter(cart_num == 1)
# Fill in missing death dates in df from LTFU datasets
df_death_LTFU <-
read.xlsx(
here(
"dfs",
"Clinical Datasets",
"IMTXLTFU-10039_Cause_of_Death_230926.xlsx"
),
detectDates = TRUE
) %>%
clean_names()
df_LTFU_additional <-
read.xlsx(
here(
"dfs",
"Clinical Datasets",
"Death and Progression Updates LTFU 240313.xlsx"
),
detectDates = TRUE
) %>%
clean_names()
df_death_LTFU <- df_death_LTFU %>%
separate(col = upn,
into = c("initials", "upn"),
sep = " ") %>%
select(upn, date_of_death, cause_of_death) %>%
mutate(date_of_death = as.Date(date_of_death)) %>%
rename(deathdat = date_of_death)
df <- rows_patch(df,
df_death_LTFU %>% select(upn, deathdat),
by = "upn",
unmatched = "ignore")
df <- rows_patch(
df,
df_LTFU_additional %>% select(upn, deathdat),
by = "upn",
unmatched = "ignore"
)
# Make the date of last follow-up the latest of: 1) date of death, 2) date of last follow-up from LTFU, 3) date of last follow-up from EMR
df_LTFU <-
read.xlsx(here("dfs", "Clinical Datasets", "LTFU 240529.xlsx"),
detectDates = TRUE) %>%
clean_names()
df_fu_LTFU <- df_LTFU %>%
select(upn, date_of_latest_progress_clinic_note) %>%
filter(!is.na(date_of_latest_progress_clinic_note)) %>%
drop_na() %>%
group_by(upn) %>%
dplyr::summarize(date_of_latest_progress_clinic_note = max(date_of_latest_progress_clinic_note))
df <- df %>%
left_join(., df_fu_LTFU, by = "upn") %>%
rowwise() %>%
mutate(datelast =
max(
deathdat,
date_of_latest_progress_clinic_note,
datelast,
na.rm = TRUE
))
# Exclude patients who died prior to day +14
df <- df %>%
filter(deathdat - cart_date >= 14 | is.na(deathdat))
# Create data frame with longitudinal ANC values
# If last lab draw occurs after 23:00, assign ANC value to the following day
df_ancx <- df_cbc %>%
filter(upn %in% df$upn) %>%
select(upn, cart_date, date, time, ancx) %>%
mutate(time = as.POSIXct(time, format = "%H:%M:%S"),
new_date = as.Date(ifelse(
time >= as.POSIXct("23:00:00", format = "%H:%M:%S"),
date + days(1),
date
))) %>%
ungroup()
df_ancx <- df_ancx %>%
select(-date, -time) %>%
rename(date = new_date)
df_ancx <- df_ancx %>%
mutate(time_post_inf = as.numeric(date - cart_date)) %>%
filter(time_post_inf %in% 0:30) %>%
distinct(upn, time_post_inf, ancx, .keep_all = TRUE) %>% # Keep unique combinations of upn, time_post_inf, and anc
group_by(upn, time_post_inf) %>%
slice_min(ancx) %>% # For each combination of upn and time_post_inf, keep the lowest ANC value
ungroup()
# Fill in missing time_post_inf and replicate UPNs by first creating df_extract
df_extract <- df %>%
distinct(upn, cart_date, datelast)
# Create replicate_ids() to replicate UPNs according to duration of follow-up
replicate_ids <- function(upn, cart_date, datelast) {
if (is.na(datelast) || (datelast - cart_date) >= 30) {
return(replicate(31, upn))
} else {
return(replicate(datelast - cart_date + 1, upn)) # Add 1 to account for day +0
}
}
# Create df_id with replicated UPNs
df_id <- df_extract %>%
dplyr::rowwise() %>%
do(data.frame(upn = replicate_ids(.$upn, .$cart_date, .$datelast))) %>%
group_by(upn) %>%
mutate(time_post_inf = row_number() - 1) %>%
ungroup()
# Merge df_id with df
df_ancx <- df_ancx %>%
group_by(upn) %>%
merge(.,
df_id,
by = c("upn", "time_post_inf"),
all.y = TRUE) %>%
ungroup()
# Fill in cart_date and date columns
df_ancx <- df_ancx %>%
group_by(upn) %>%
mutate(cart_date = as.Date(ifelse(
is.na(cart_date), max(cart_date, na.rm = TRUE), cart_date
)),
date = as.Date(cart_date + time_post_inf)) %>%
ungroup()
# Linearly interpolate missing ANC values for up to 7 consecutive NAs
df_ancx <- df_ancx %>%
arrange(upn, time_post_inf) %>%
group_by(upn) %>%
mutate(ancx = na.approx(ancx, maxgap = 7, rule = 2)) %>%
ungroup()
# Round ANC values to nearest integer divisible by 10 (like real ANC values)
df_ancx <- df_ancx %>%
mutate(ancx = round(ancx/10) * 10)
# Exclude those with missing ANCs after interpolation
df_ancx <- df_ancx %>%
group_by(upn) %>%
mutate(sumNA = sum(is.na(ancx))) %>%
filter(sumNA == 0) %>%
ungroup()
# Add UWIDs
df_ancx <- df_ancx %>%
inner_join(., df %>% select(upn, uwid), by = "upn") %>%
select(upn, uwid, cart_date, date, time_post_inf, ancx)
# Log10-transform ANC and select variables for clustering
df_anc <- df_ancx %>%
mutate(anc = ifelse(ancx == 0, 0.01, ancx),
anc = log10(anc)) %>%
select(upn, time_post_inf, anc)
var_label(df_anc$time_post_inf) <- "Days after CAR T-cell infusion"
var_label(df_anc$anc) <- "ANC"
# For eICAHT threshold of ANC ≤ 500 cells/μL
df_exceedances_below_501 <-
# ddply splits a data frame, applies a function, and combines results into a data frame
plyr::ddply(.data = df_ancx, .variables = c("upn", "uwid"), .fun = function(data) {
heatwaveR::exceedance(
data,
x = date,
y = ancx,
threshold = 501,
below = TRUE, # Whether to detect events below the threshold,
minDuration = 1, # Minimum duration for acceptance of detected events
joinAcrossGaps = TRUE, # Whether to join events which occur before/after gap
maxGap = 2 # Maximum length of gap allowed for joining of events
)$exceedance
})
# Create data frame containing maximum duration of events for each UPN
df_anc_500 <- df_exceedances_below_501 %>%
select(upn, uwid, exceedance_no, duration) %>%
group_by(upn, uwid) %>%
dplyr::summarize(duration_below_501_max = max(duration)) %>%
mutate(duration_below_501_max = ifelse( # Replace NAs with 0 (never below threshold)
is.na(duration_below_501_max),
0,
duration_below_501_max
))
# For eICAHT threshold of ANC ≤ 100 cells/μL
df_exceedances_below_101 <-
plyr::ddply(.data = df_ancx, .variables = c("upn"), .fun = function(data) {
heatwaveR::exceedance(
data,
x = date,
y = ancx,
threshold = 101,
below = TRUE,
minDuration = 1,
joinAcrossGaps = TRUE,
maxGap = 2,
maxPadLength = FALSE
)$exceedance
})
df_anc_100 <- df_exceedances_below_101 %>%
select(upn, exceedance_no, duration) %>%
group_by(upn) %>%
dplyr::summarize(duration_below_101_max = max(duration)) %>%
mutate(duration_below_101_max = ifelse(
is.na(duration_below_101_max),
0,
duration_below_101_max
))
# Create data frame with exceedances below each threshold for each UPN
df_icaht <- df_anc_500 %>%
left_join(., df_anc_100, by = "upn") %>%
ungroup()
# Create column with ICAHT grades
df_icaht <- df_icaht %>%
mutate(
icaht_grade = case_when(
duration_below_501_max == 0 & duration_below_101_max == 0 ~ "Grade 0",
duration_below_501_max %in% 1:6 & duration_below_101_max < 7 ~ "Grade 1",
duration_below_501_max %in% 7:13 & duration_below_101_max < 7 ~ "Grade 2",
(duration_below_501_max %in% 14:30 & duration_below_101_max < 7) |
(duration_below_501_max < 31 & duration_below_101_max %in% 7:13) ~ "Grade 3",
(duration_below_501_max >= 31 & duration_below_101_max < 14) | duration_below_101_max >= 14 ~ "Grade 4"
)
)
df_exceedances_below_501 <- df_exceedances_below_501 %>%
filter(!is.na(exceedance_no)) %>% # Filter out patients who never had neutropenia
select(upn,
exceedance_below_501_no = exceedance_no,
exceedance_below_501_date_start = date_start,
exceedance_below_501_date_end = date_end) %>%
mutate(exceedance_below_501_date_start = as.Date(exceedance_below_501_date_start), # Convert dates to class "Date"
exceedance_below_501_date_end = as.Date(exceedance_below_501_date_end))
# Add date of last available ANC value
df_last_time_post_inf <- df_ancx %>%
arrange(upn, time_post_inf) %>%
group_by(upn) %>%
slice_tail(n = 1) %>% # Select last row
ungroup() %>%
select(upn,
cart_date,
last_lab_date = date)
df_exceedances_below_501 <- df_exceedances_below_501 %>%
left_join(., df_last_time_post_inf, by = "upn")
# Classify patients who had exceedances below ANC ≤ 500 cells/μL
# starting between days 0-3 and ending on "last_lab_date" as grade 4 early ICAHT
df_exceedances_below_501 <- df_exceedances_below_501 %>%
mutate(
icaht_grade_4 = ifelse(
exceedance_below_501_date_start - cart_date <= 3 &
exceedance_below_501_date_end == last_lab_date,
"Yes",
"No"
)
)
df_icaht <- df_icaht %>%
left_join(.,
df_exceedances_below_501 %>% select(upn, icaht_grade_4),
by = "upn")
df_icaht <- df_icaht %>%
mutate(
icaht_grade = dplyr::if_else(
icaht_grade_4 == "Yes",
"Grade 4",
icaht_grade,
missing = icaht_grade
)
) %>%
distinct(upn,
icaht_grade,
.keep_all = TRUE)
# Data frame contains automated grades assigned by {heatwaveR} approach, MSKCC manual code, and manual grades for discrepancies
df_check <-
read.xlsx(here("dfs",
"Manual vs. Automated Early ICAHT Grades.xlsx")) %>%
clean_names()
# Filter to cases incorrectly graded by {heatwaveR} approach
df_check <- df_check %>%
filter(center == "FHCC",
disagreement_manual_heatwave_r == 1) %>%
select(upn = patient_id,
manual_icaht_grade) %>%
mutate(manual_icaht_grade = paste0("Grade ", manual_icaht_grade))
# Replace grades in df_icaht with correct grades
df_icaht <- df_icaht %>%
left_join(., df_check, by = "upn")
df_icaht <- df_icaht %>%
mutate(icaht_grade = ifelse(!is.na(manual_icaht_grade), manual_icaht_grade, icaht_grade)) %>%
select(-icaht_grade_4, -manual_icaht_grade)
# Add ICAHT grades to df
df <- df %>%
filter(upn %in% df_icaht$upn) %>%
left_join(., df_icaht %>% select(upn, icaht_grade), by = "upn")
df <- df %>%
mutate(icaht_grade_num = case_when(icaht_grade == "Grade 0" ~ 0,
icaht_grade == "Grade 1" ~ 1,
icaht_grade == "Grade 2" ~ 2,
icaht_grade == "Grade 3" ~ 3,
icaht_grade == "Grade 4" ~ 4))
df <- df %>%
mutate(
disease_cat =
factor(disease_cat,
levels = c("Indolent NHL", "Aggressive NHL", "ALL", "MM/PCL")),
product_cat = as.factor(product_cat),
car_target = case_when(
product_cat == "Commercial CD19 CAR T-cell product with CD28 costimulatory domain (axi-cel, brexu-cel)" |
product_cat == "Commercial CD19 CAR T-cell product with 4-1BB costimulatory domain (liso-cel, tisa-cel)" |
product_infused == "JCAR014" |
product_infused == "JCAR021" ~ "CD19",
product_infused == "Investigational CD20 product" ~ "CD20",
.default = "BCMA"
),
car_target = factor(car_target,
levels = c("BCMA", "CD19", "CD20")),
racenih_cat = ifelse(racenih == "White", "White", "Non-White"),
racenih_cat = fct_relevel(as.factor(racenih_cat), "White"),
ethnih_cat = ifelse(
ethnih == "Hispanic or Latino",
"Hispanic or Latino",
"Not Hispanic/Latino or Unknown"
),
ethnih = fct_relevel(as.factor(ethnih_cat), "Not Hispanic/Latino or Unknown"),
icaht_grade = fct_relevel(icaht_grade, "Grade 0"),
icaht_grade_3_4 = ifelse(icaht_grade_num %in% 3:4, 1, 0),
prior_hct = ifelse(!is.na(txdate1) &
txdate1 < cart_date, "Yes", "No")
)
df <- df %>%
select(
upn,
cart_num,
cart_date,
cart_age,
gender_desc,
racenih,
racenih_cat,
ethnih,
ethnih_cat,
disease_cat,
prior_hct,
product_cat,
product_infused,
car_target,
costim_domain,
datelast,
deathdat,
icaht_grade,
icaht_grade_num,
icaht_grade_3_4
)
df_tox <-
read.xlsx(here("dfs", "CAR Toxicity Data 240529.xlsx"),
detectDates = TRUE) %>%
clean_names()
df <- df %>%
left_join(., df_tox %>% select(upn, crs_grade, icans_grade), by = "upn")
var_label(df$cart_age) <- "Age"
var_label(df$gender_desc) <- "Sex"
var_label(df$racenih) <- "Race"
var_label(df$racenih_cat) <- "Race"
var_label(df$ethnih) <- "Ethnicity"
var_label(df$ethnih_cat) <- "Ethnicity"
var_label(df$disease_cat) <- "Disease"
var_label(df$prior_hct) <- "Prior HCT"
var_label(df$product_cat) <- "CAR T-cell product category"
var_label(df$product_infused) <- "CAR T-cell product"
var_label(df$car_target) <- "CAR target"
var_label(df$costim_domain) <- "Costimulatory domain"
var_label(df$crs_grade) <- "CRS grade"
var_label(df$icans_grade) <- "ICANS grade"
var_label(df$icaht_grade) <- "ICAHT grade"
# Regimen
df_LD <-
read.xlsx(
here(
"dfs",
"EPIC Data Export",
"2024.05.29.lymphodepletionRegimen.xlsx"
),
detectDates = TRUE
) %>%
clean_names()
df_LD <- df_LD %>% filter(cart_num == 1)
df_LD <- df_LD %>%
mutate(LD_regimen = case_when(
grepl("cyclophosphamide", dep_regimen, ignore.case = TRUE) &
grepl("fludarabine", dep_regimen, ignore.case = TRUE) ~ "Cy/Flu",
.default = "Other"
))
# Dose
df_LD_dose <-
read.xlsx(here("dfs",
"EPIC Data Export",
"2024.05.29.treatmentNotes.xlsx"),
detectDates = TRUE) %>%
clean_names()
df_LD_dose <- df_LD_dose %>% filter(cart_num == 1)
df_LD_dose <- df_LD_dose %>%
group_by(upn) %>%
mutate(LD_dose = paste(condition_notes, collapse = " ")) %>%
slice_head(n = 1)
# Join data frames
df_LD <- df_LD %>%
left_join(df_LD_dose %>% select(upn, LD_dose), by = "upn")
# Categorize lymphodepletion regimens
# Define high-intensity Cy/Flu regimens
high_cy_flu <-
c(
"Cyclophosphamide 500 mg/msq/day",
"Cyclophosphamide 60 mg/kg x 1 dose",
"Cyclophosphamide 60 mg/kg IV x 1 dose",
"Cyclophosphamide 30 mg/kg x 1 dose"
)
df_LD <- df_LD %>%
mutate(
LD_regimen_cat = case_when(
LD_regimen == "Cy/Flu" &
grepl(paste(high_cy_flu, collapse = "|"), LD_dose, ignore.case = TRUE) ~ "High-intensity Cy/Flu",
LD_regimen == "Cy/Flu" ~ "Low-intensity Cy/Flu",
.default = "Other"
)
)
# Add LD regimen data to df
df <- df %>%
left_join(., df_LD %>% select(upn, LD_regimen_cat), by = "upn")
df <- df %>%
mutate(
LD_regimen_cat = fct_relevel(
LD_regimen_cat,
c("Low-intensity Cy/Flu", "High-intensity Cy/Flu", "Other")
),
LD_regimen_low_CyFlu_vs_other = ifelse(
LD_regimen_cat == "Low-intensity Cy/Flu",
"Low-intensity Cy/Flu",
"Other"
),
LD_regimen_low_CyFlu_vs_other = factor(
LD_regimen_low_CyFlu_vs_other,
levels = c("Low-intensity Cy/Flu", "Other")
)
)
var_label(df$LD_regimen_cat) <- "Lymphodepletion regimen"
var_label(df$LD_regimen_low_CyFlu_vs_other) <- "Lymphodepletion regimen (low-dose Cy/Flu vs. other)"
# If last CBC draw occurs after 23:00, assign value to the following day
df_cbc <- df_cbc %>%
mutate(time = as.POSIXct(time, format = "%H:%M:%S"),
date = as.Date(ifelse(
time >= as.POSIXct("23:00:00", format = "%H:%M:%S"),
date + 1,
date
)),
day = as.numeric(date - cart_date)) %>%
ungroup()
# Other labs
df_labs <-
read.xlsx(here("dfs", "EPIC Data Export", "2024.05.29.labs.xlsx"),
detectDates = TRUE) %>%
clean_names()
df_labs <- df_labs %>%
filter(cart_num == 1)
df_labs <- df_labs %>%
mutate(time = as.POSIXct(time, format = "%H:%M:%S"),
new_date = as.Date(ifelse(
time >= as.POSIXct("23:00:00", format = "%H:%M:%S"),
date + 1,
date
)),
new_day = as.numeric(new_date - cart_date)) %>%
ungroup()
# Pre-lymphodepletion labs
df_pre_ld <-
read.xlsx(
here(
"dfs",
"EPIC Data Export",
"2024.05.29.prelymphodepletionLabsHems.xlsx"
),
detectDates = TRUE
) %>%
clean_names()
df_pre_ld <- df_pre_ld %>%
filter(cart_num == 1)
## Add pre-LD ANC, ALC, Hb, platelet count, and LDH to df
df <- df_pre_ld %>%
mutate(
anc_pre_ld = ancx_pre_d / 1000,
lym_pre_ld = lymx_pre_d / 1000,
hb_pre_ld = hgb_amt_pre_d,
plt_pre_ld = tot_platelet_cnt_pre_d / 1000
) %>%
select(upn, anc_pre_ld, lym_pre_ld, hb_pre_ld, plt_pre_ld, ldh_pre_ld = ldh_u_l_pre_d) %>%
left_join(df, ., by = "upn")
var_label(df$anc_pre_ld) <- "Pre-LD ANC"
var_label(df$hb_pre_ld) <- "Pre-LD Hb"
var_label(df$plt_pre_ld) <- "Pre-LD platelet count"
# Add day +0 LDH to df
# If there are multiple day +0 values, use the earliest one (corresponding to pre-infusion value)
df <- df_labs %>%
filter(day == 0, !is.na(ldh_u_l)) %>%
select(upn, day, time, ldh_u_l) %>%
group_by(upn) %>%
filter(time == min(time)) %>%
select(upn, ldh_u_l) %>%
rename(ldh_day_0 = ldh_u_l) %>%
left_join(df, ., by = "upn")
# Add pre-LD, day +0, day +3, day +5, day +7, and peak CRP to df (where peak is between days +0 and +30)
# For day +3, use values between days +2 through +4
# If there are multiple day +0 values, use the earliest one (corresponding to pre-infusion value)
# For the other timepoints: if there are multiple values, use the peak/nadir value (depending on the laboratory test of interest)
df <- df_pre_ld %>%
select(upn, crp_mg_l_pre_d) %>%
rename(crp_pre_ld = crp_mg_l_pre_d) %>%
left_join(df, ., by = "upn")
df <- df_labs %>%
filter(day == 0, !is.na(crp_mg_l)) %>%
select(upn, day, time, crp_mg_l) %>%
group_by(upn) %>%
filter(time == min(time)) %>%
select(upn, crp_mg_l) %>%
rename(crp_day_0 = crp_mg_l) %>%
left_join(df, ., by = "upn")
df <- df_labs %>%
filter(day %in% 2:4, !is.na(crp_mg_l)) %>%
select(upn, day, crp_mg_l) %>%
group_by(upn) %>%
dplyr::summarize(crp_day_3 = max(crp_mg_l)) %>%
select(upn, crp_day_3) %>%
left_join(df, ., by = "upn")
df <- df_labs %>%
filter(day == 5, !is.na(crp_mg_l)) %>%
select(upn, day, crp_mg_l) %>%
group_by(upn) %>%
dplyr::summarize(crp_day_5 = max(crp_mg_l)) %>%
select(upn, crp_day_5) %>%
left_join(df, ., by = "upn")
df <- df_labs %>%
filter(day == 7, !is.na(crp_mg_l)) %>%
select(upn, day, crp_mg_l) %>%
group_by(upn) %>%
dplyr::summarize(crp_day_7 = max(crp_mg_l)) %>%
select(upn, crp_day_7) %>%
left_join(df, ., by = "upn")
df <- df_labs %>%
filter(day %in% 0:30, !is.na(crp_mg_l)) %>%
select(upn,
crp_mg_l) %>%
group_by(upn) %>%
dplyr::summarize(crp_max = max(crp_mg_l)) %>%
left_join(df, ., by = "upn")
# Add pre-LD, day +0, day +3, day +5, day +7, and peak ferritin to df
df <- df_pre_ld %>%
select(upn, ferritin_ng_ml_pre_d) %>%
rename(ferritin_pre_ld = ferritin_ng_ml_pre_d) %>%
left_join(df, ., by = "upn")
df <- df_labs %>%
filter(day == 0, !is.na(ferritin_ng_ml)) %>%
select(upn, day, time, ferritin_ng_ml) %>%
group_by(upn) %>%
filter(time == min(time)) %>%
select(upn, ferritin_ng_ml) %>%
rename(ferritin_day_0 = ferritin_ng_ml) %>%
left_join(df, ., by = "upn")
df <- df_labs %>%
filter(day %in% 2:4, !is.na(ferritin_ng_ml)) %>%
select(upn, day, ferritin_ng_ml) %>%
group_by(upn) %>%
dplyr::summarize(ferritin_day_3 = max(ferritin_ng_ml)) %>%
select(upn, ferritin_day_3) %>%
left_join(df, ., by = "upn")
df <- df_labs %>%
filter(day == 5, !is.na(ferritin_ng_ml)) %>%
select(upn, day, ferritin_ng_ml) %>%
group_by(upn) %>%
dplyr::summarize(ferritin_day_5 = max(ferritin_ng_ml)) %>%
select(upn, ferritin_day_5) %>%
left_join(df, ., by = "upn")
df <- df_labs %>%
filter(day == 7, !is.na(ferritin_ng_ml)) %>%
select(upn, day, ferritin_ng_ml) %>%
group_by(upn) %>%
dplyr::summarize(ferritin_day_7 = max(ferritin_ng_ml)) %>%
select(upn, ferritin_day_7) %>%
left_join(df, ., by = "upn")
df <- df_labs %>%
filter(day %in% 0:30, !is.na(ferritin_ng_ml)) %>%
select(upn,
ferritin_ng_ml) %>%
group_by(upn) %>%
dplyr::summarize(ferritin_max = max(ferritin_ng_ml)) %>%
left_join(df, ., by = "upn")
# Add pre-LD, day +0, day +3, day +5, day +7, and peak IL-6 to df
df <- df_pre_ld %>%
select(upn, il6_pg_ml_pre_d) %>%
rename(il6_pre_ld = il6_pg_ml_pre_d) %>%
left_join(df, ., by = "upn")
df <- df_labs %>%
filter(day == 0, !is.na(il6_pg_ml)) %>%
select(upn, day, time, il6_pg_ml) %>%
group_by(upn) %>%
filter(time == min(time)) %>%
select(upn, il6_pg_ml) %>%
rename(il6_day_0 = il6_pg_ml) %>%
left_join(df, ., by = "upn")
df <- df_labs %>%
filter(day %in% 2:4, !is.na(il6_pg_ml)) %>%
select(upn, day, il6_pg_ml) %>%
group_by(upn) %>%
dplyr::summarize(il6_day_3 = max(il6_pg_ml)) %>%
select(upn, il6_day_3) %>%
left_join(df, ., by = "upn")
df <- df_labs %>%
filter(day == 5, !is.na(il6_pg_ml)) %>%
select(upn, day, il6_pg_ml) %>%
group_by(upn) %>%
dplyr::summarize(il6_day_5 = max(il6_pg_ml)) %>%
select(upn, il6_day_5) %>%
left_join(df, ., by = "upn")
df <- df_labs %>%
filter(day == 7, !is.na(il6_pg_ml)) %>%
select(upn, day, il6_pg_ml) %>%
group_by(upn) %>%
dplyr::summarize(il6_day_7 = max(il6_pg_ml)) %>%
select(upn, il6_day_7) %>%
left_join(df, ., by = "upn")
df <- df_labs %>%
filter(day %in% 0:30, !is.na(il6_pg_ml)) %>%
select(upn, il6_pg_ml) %>%
group_by(upn) %>%
dplyr::summarize(il6_max = max(il6_pg_ml)) %>%
left_join(df, ., by = "upn")
# Add pre-LD, day +0, day +3, day +5, day + 7, and minimum fibrinogen to df
df <- df_pre_ld %>%
select(upn, fibrinogen_mg_dl_pre_d) %>%
rename(fibrinogen_pre_ld = fibrinogen_mg_dl_pre_d) %>%
left_join(df, ., by = "upn")
df <- df_labs %>%
filter(day == 0, !is.na(fibrinogen_mg_dl)) %>%
select(upn, day, time, fibrinogen_mg_dl) %>%
group_by(upn) %>%
filter(time == min(time)) %>%
select(upn, fibrinogen_mg_dl) %>%
rename(fibrinogen_day_0 = fibrinogen_mg_dl) %>%
left_join(df, ., by = "upn")
df <- df_labs %>%
filter(day %in% 2:4, !is.na(fibrinogen_mg_dl)) %>%
select(upn, day, fibrinogen_mg_dl) %>%
group_by(upn) %>%
dplyr::summarize(fibrinogen_day_3 = max(fibrinogen_mg_dl)) %>%
select(upn, fibrinogen_day_3) %>%
left_join(df, ., by = "upn")
df <- df_labs %>%
filter(day == 5, !is.na(fibrinogen_mg_dl)) %>%
select(upn, day, fibrinogen_mg_dl) %>%
group_by(upn) %>%
dplyr::summarize(fibrinogen_day_5 = max(fibrinogen_mg_dl)) %>%
select(upn, fibrinogen_day_5) %>%
left_join(df, ., by = "upn")
df <- df_labs %>%
filter(day == 7, !is.na(fibrinogen_mg_dl)) %>%
select(upn, day, fibrinogen_mg_dl) %>%
group_by(upn) %>%
dplyr::summarize(fibrinogen_day_7 = max(fibrinogen_mg_dl)) %>%
select(upn, fibrinogen_day_7) %>%
left_join(df, ., by = "upn")
df <- df_labs %>%
filter(day %in% 0:30, !is.na(fibrinogen_mg_dl)) %>%
select(upn,
fibrinogen_mg_dl) %>%
group_by(upn) %>%
dplyr::summarize(fibrinogen_min = min(fibrinogen_mg_dl)) %>%
left_join(df, ., by = "upn")
# Add pre-LD, day +0, day +3, day +5, day + 7, and peak D-dimer to df
df <- df_pre_ld %>%
select(upn, d_dimer_pre_d) %>%
rename(d_dimer_pre_ld = d_dimer_pre_d) %>%
left_join(df, ., by = "upn")
df <- df_labs %>%
filter(day == 0, !is.na(d_dimer)) %>%
select(upn, day, time, d_dimer) %>%
group_by(upn) %>%
filter(time == min(time)) %>%
select(upn, d_dimer) %>%
rename(d_dimer_day_0 = d_dimer) %>%
left_join(df, ., by = "upn")
df <- df_labs %>%
filter(day %in% 2:4, !is.na(d_dimer)) %>%
select(upn, day, d_dimer) %>%
group_by(upn) %>%
dplyr::summarize(d_dimer_day_3 = max(d_dimer)) %>%
select(upn, d_dimer_day_3) %>%
left_join(df, ., by = "upn")
df <- df_labs %>%
filter(day == 5, !is.na(d_dimer)) %>%
select(upn, day, d_dimer) %>%
group_by(upn) %>%
dplyr::summarize(d_dimer_day_5 = max(d_dimer)) %>%
select(upn, d_dimer_day_5) %>%
left_join(df, ., by = "upn")
df <- df_labs %>%
filter(day == 7, !is.na(d_dimer)) %>%
select(upn, day, d_dimer) %>%
group_by(upn) %>%
dplyr::summarize(d_dimer_day_7 = max(d_dimer)) %>%
select(upn, d_dimer_day_7) %>%
left_join(df, ., by = "upn")
df <- df_labs %>%
filter(day %in% 0:30, !is.na(d_dimer)) %>%
select(upn,
d_dimer) %>%
group_by(upn) %>%
dplyr::summarize(d_dimer_max = max(d_dimer)) %>%
left_join(df, ., by = "upn")
# Add pre-LD, day +0, day +3, day +5, day + 7, and peak PTT to df
df <- df_pre_ld %>%
select(upn, ptt_patient_pre_d) %>%
rename(ptt_pre_ld = ptt_patient_pre_d) %>%
left_join(df, ., by = "upn")
df <- df_labs %>%
filter(day == 0, !is.na(ptt_patient)) %>%
select(upn, day, time, ptt_patient) %>%
group_by(upn) %>%
filter(time == min(time)) %>%
select(upn, ptt_patient) %>%
rename(ptt_day_0 = ptt_patient) %>%
left_join(df, ., by = "upn")
df <- df_labs %>%
filter(day %in% 2:4, !is.na(ptt_patient)) %>%
select(upn, day, ptt_patient) %>%
group_by(upn) %>%
dplyr::summarize(ptt_day_3 = max(ptt_patient)) %>%
select(upn, ptt_day_3) %>%
left_join(df, ., by = "upn")
df <- df_labs %>%
filter(day == 5, !is.na(ptt_patient)) %>%
select(upn, day, ptt_patient) %>%
group_by(upn) %>%
dplyr::summarize(ptt_day_5 = max(ptt_patient)) %>%
select(upn, ptt_day_5) %>%
left_join(df, ., by = "upn")
df <- df_labs %>%
filter(day == 7, !is.na(ptt_patient)) %>%
select(upn, day, ptt_patient) %>%
group_by(upn) %>%
dplyr::summarize(ptt_day_7 = max(ptt_patient)) %>%
select(upn, ptt_day_7) %>%
left_join(df, ., by = "upn")
df <- df_labs %>%
filter(day %in% 0:30, !is.na(ptt_patient)) %>%
select(upn,
ptt_patient) %>%
group_by(upn) %>%
dplyr::summarize(ptt_max = max(ptt_patient)) %>%
left_join(df, ., by = "upn")
# Log10-transform laboratory data
df <- df %>%
mutate(across(anc_pre_ld:ptt_max, ~ifelse(. == 0, 0.01, .), .names = "{.col}_log10")) %>%
mutate(across(anc_pre_ld_log10:ptt_max_log10, log10))
var_label(df$anc_pre_ld_log10) <- "Pre-LD ANC"
var_label(df$lym_pre_ld_log10) <- "Pre-LD ALC"
var_label(df$hb_pre_ld) <- "Pre-LD Hb"
var_label(df$plt_pre_ld_log10) <- "Pre-LD platelet count"
var_label(df$ldh_pre_ld_log10) <- "Pre-LD LDH"
var_label(df$crp_pre_ld_log10) <- "Pre-LD CRP"
var_label(df$crp_day_0_log10) <- "Day +0 CRP"
var_label(df$crp_day_3_log10) <- "Day +3 CRP"
var_label(df$crp_day_5_log10) <- "Day +5 CRP"
var_label(df$crp_day_7_log10) <- "Day +7 CRP"
var_label(df$crp_max_log10) <- "Peak CRP"
var_label(df$ferritin_pre_ld_log10) <- "Pre-LD ferritin"
var_label(df$ferritin_day_0_log10) <- "Day +0 ferritin"
var_label(df$ferritin_day_3_log10) <- "Day +3 ferritin"
var_label(df$ferritin_day_5_log10) <- "Day +5 ferritin"
var_label(df$ferritin_day_7_log10) <- "Day +7 ferritin"
var_label(df$ferritin_max_log10) <- "Peak ferritin"
var_label(df$il6_pre_ld_log10) <- "Pre-LD IL-6"
var_label(df$il6_day_0_log10) <- "Day +0 IL-6"
var_label(df$il6_day_3_log10) <- "Day +3 IL-6"
var_label(df$il6_day_5_log10) <- "Day +5 IL-6"
var_label(df$il6_day_7_log10) <- "Day +7 IL-6"
var_label(df$il6_max_log10) <- "Peak IL-6"
var_label(df$fibrinogen_pre_ld_log10) <- "Pre-LD fibrinogen"
var_label(df$fibrinogen_day_0_log10) <- "Day +0 fibrinogen"
var_label(df$fibrinogen_day_3_log10) <- "Day +3 fibrinogen"
var_label(df$fibrinogen_day_5_log10) <- "Day +5 fibrinogen"
var_label(df$fibrinogen_day_7_log10) <- "Day +7 fibrinogen"
var_label(df$fibrinogen_min_log10) <- "Nadir fibrinogen"
var_label(df$d_dimer_pre_ld_log10) <- "Pre-LD D-dimer"
var_label(df$d_dimer_day_0_log10) <- "Day +0 D-dimer"
var_label(df$d_dimer_day_3_log10) <- "Day +3 D-dimer"
var_label(df$d_dimer_day_5_log10) <- "Day +5 D-dimer"
var_label(df$d_dimer_day_7_log10) <- "Day +7 D-dimer"
var_label(df$d_dimer_max_log10) <- "Peak D-dimer"
var_label(df$ptt_pre_ld_log10) <- "Pre-LD PTT"
var_label(df$ptt_day_0_log10) <- "Day +0 PTT"
var_label(df$ptt_day_3_log10) <- "Day +3 PTT"
var_label(df$ptt_day_5_log10) <- "Day +5 PTT"
var_label(df$ptt_day_7_log10) <- "Day +7 PTT"
var_label(df$ptt_max_log10) <- "Peak PTT"
df %>%
select(
cart_age,
gender_desc,
racenih,
ethnih,
prior_hct,
disease_cat,
anc_pre_ld,
hb_pre_ld,
plt_pre_ld,
LD_regimen_cat,
product_infused,
product_cat
) %>%
tbl_summary(
missing_text = "Missing",
type = list(all_continuous() ~ "continuous2"),
statistic = list(
all_continuous() ~ c("{median} ({p25}, {p75})", "{min} - {max}"),
all_categorical() ~ "{n} ({p}%)"
),
digits = all_categorical() ~ 0,
sort = list(everything() ~ "frequency")
) %>%
bold_labels()
| Characteristic | N = 6911 |
|---|---|
| Age | |
| Median (IQR) | 61 (51, 68) |
| Range | 19 - 84 |
| Sex | |
| Male | 437 (63%) |
| Female | 254 (37%) |
| Race | |
| White | 583 (84%) |
| Asian | 58 (8%) |
| Black or African American | 18 (3%) |
| American Indian or Alaska Native | 13 (2%) |
| Multiple | 8 (1%) |
| Unknown | 7 (1%) |
| Native Hawaiian or other Pacific Islander | 4 (1%) |
| Ethnicity | |
| Not Hispanic/Latino or Unknown | 639 (92%) |
| Hispanic or Latino | 52 (8%) |
| Prior HCT | 234 (34%) |
| Disease | |
| Aggressive NHL | 318 (46%) |
| Indolent NHL | 154 (22%) |
| MM/PCL | 120 (17%) |
| ALL | 99 (14%) |
| Pre-LD ANC | |
| Median (IQR) | 2.84 (1.79, 4.29) |
| Range | 0.00 - 54.03 |
| Missing | 1 |
| Pre-LD Hb | |
| Median (IQR) | 10.70 (9.50, 12.35) |
| Range | 7.00 - 17.10 |
| Pre-LD platelet count | |
| Median (IQR) | 142 (89, 202) |
| Range | 6 - 790 |
| Lymphodepletion regimen | |
| Low-intensity Cy/Flu | 403 (58%) |
| High-intensity Cy/Flu | 253 (37%) |
| Other | 35 (5%) |
| CAR T-cell product | |
| JCAR014 | 193 (28%) |
| YESCARTA | 140 (20%) |
| BREYANZI | 87 (13%) |
| CARVYKTI | 52 (8%) |
| TECARTUS | 47 (7%) |
| Investigational CD20 product | 45 (7%) |
| JCAR021 | 44 (6%) |
| Investigational BCMA product | 38 (5%) |
| ABECMA | 30 (4%) |
| KYMRIAH | 15 (2%) |
| CAR T-cell product category | |
| Investigational CAR T-cell product | 320 (46%) |
| Commercial CD19 CAR T-cell product with CD28 costimulatory domain (axi-cel, brexu-cel) | 187 (27%) |
| Commercial CD19 CAR T-cell product with 4-1BB costimulatory domain (liso-cel, tisa-cel) | 102 (15%) |
| Commercial BCMA CAR T-cell product (cilta-cel, ide-cel) | 82 (12%) |
| 1 n (%) | |
df %>%
select(icaht_grade) %>%
tbl_summary(
statistic = list(all_categorical() ~ "{n} ({p}%)"),
digits = all_categorical() ~ 0,
sort = list(everything() ~ "frequency")
) %>%
bold_labels()
| Characteristic | N = 6911 |
|---|---|
| ICAHT grade | |
| Grade 1 | 366 (53%) |
| Grade 0 | 112 (16%) |
| Grade 2 | 111 (16%) |
| Grade 3 | 67 (10%) |
| Grade 4 | 35 (5%) |
| 1 n (%) | |
df %>%
select(crs_grade, icans_grade) %>%
tbl_summary(statistic = list(all_categorical() ~ "{n} ({p}%)"))
| Characteristic | N = 6911 |
|---|---|
| CRS grade | |
| 0 | 175 (25%) |
| 1 | 224 (32%) |
| 2 | 257 (37%) |
| 3 | 23 (3.3%) |
| 4 | 10 (1.4%) |
| 5 | 2 (0.3%) |
| ICANS grade | |
| 0 | 411 (59%) |
| 1 | 72 (10%) |
| 2 | 84 (12%) |
| 3 | 103 (15%) |
| 4 | 19 (2.7%) |
| 5 | 2 (0.3%) |
| 1 n (%) | |
df <- df %>%
mutate(
OS_code = ifelse(is.na(deathdat), 0, 1),
OS_months = as.duration(cart_date %--% datelast) / dmonths(1)
)
# Create data frame with OS landmarked at day +30
df_landmark_OS <- df %>%
filter(OS_months > 30/30.4167) %>%
mutate(OS_months_landmark = OS_months - 30/30.4167)
quantile(prodlim(Hist(OS_months, OS_code) ~ 1, data = df, reverse = TRUE))
Quantiles of the potential follow up time distribution based on the Kaplan-Meier method
applied to the censored times reversing the roles of event status and censored.
Table of quantiles and corresponding confidence limits:
q quantile lower upper
<num> <num> <num> <num>
1: 0.00 NA NA NA
2: 0.25 63.28 57.46 72.18
3: 0.50 35.12 28.85 38.21
4: 0.75 12.25 11.33 13.86
5: 1.00 0.92 0.92 0.99
Median time (IQR):35.12 (12.25;63.28)
Call: survfit(formula = Surv(OS_months, OS_code) ~ 1, data = df)
n events median 0.95LCL 0.95UCL
[1,] 691 320 29.4 24.2 38.6
# 3-year estimate
summary(surv_OS, times = 36)
Call: survfit(formula = Surv(OS_months, OS_code) ~ 1, data = df)
time n.risk n.event survival std.err lower 95% CI upper 95% CI
36 154 285 0.465 0.023 0.422 0.512
Using log10-transformed laboratory values, except pre-LD hemoglobin due to lack of convergence with log10(pre-LD hemoglobin)
tbl_uvregression <- df %>%
select(
icaht_grade_3_4,
cart_age,
gender_desc,
racenih_cat,
ethnih_cat,
prior_hct,
disease_cat,
LD_regimen_cat,
LD_regimen_low_CyFlu_vs_other,
car_target,
costim_domain,
crs_grade,
icans_grade,
anc_pre_ld_log10,
hb_pre_ld,
plt_pre_ld_log10,
ldh_pre_ld_log10,
crp_pre_ld_log10,
crp_day_0_log10,
crp_day_3_log10,
crp_day_5_log10,
crp_day_7_log10,
crp_max_log10,
ferritin_pre_ld_log10,
ferritin_day_0_log10,
ferritin_day_3_log10,
ferritin_day_5_log10,
ferritin_day_7_log10,
ferritin_max_log10,
il6_pre_ld_log10,
il6_day_0_log10,
il6_day_3_log10,
il6_day_5_log10,
il6_day_7_log10,
il6_max_log10,
fibrinogen_pre_ld_log10,
fibrinogen_day_0_log10,
fibrinogen_day_3_log10,
fibrinogen_day_5_log10,
fibrinogen_day_7_log10,
fibrinogen_min_log10,
d_dimer_pre_ld_log10,
d_dimer_day_0_log10,
d_dimer_day_3_log10,
d_dimer_day_5_log10,
d_dimer_day_7_log10,
d_dimer_max_log10,
ptt_pre_ld_log10,
ptt_day_0_log10,
ptt_day_3_log10,
ptt_day_5_log10,
ptt_day_7_log10,
ptt_max_log10
) %>%
tbl_uvregression(
method = glm,
y = icaht_grade_3_4,
method.args = list(family = binomial),
exponentiate = TRUE,
pvalue_fun = ~ style_pvalue(.x, digits = 2)
) %>%
add_nevent() %>%
bold_p() %>%
bold_labels()
tbl_uvregression
| Characteristic | N | Event N | OR1 | 95% CI1 | p-value |
|---|---|---|---|---|---|
| Age | 691 | 102 | 0.97 | 0.96, 0.98 | <0.001 |
| Sex | 691 | 102 | |||
| Female | — | — | |||
| Male | 0.84 | 0.55, 1.30 | 0.44 | ||
| Race | 691 | 102 | |||
| White | — | — | |||
| Non-White | 0.76 | 0.39, 1.37 | 0.39 | ||
| Ethnicity | 691 | 102 | |||
| Hispanic or Latino | — | — | |||
| Not Hispanic/Latino or Unknown | 0.55 | 0.28, 1.12 | 0.083 | ||
| Prior HCT | 691 | 102 | |||
| No | — | — | |||
| Yes | 1.25 | 0.80, 1.92 | 0.31 | ||
| Disease | 691 | 102 | |||
| Indolent NHL | — | — | |||
| Aggressive NHL | 0.88 | 0.49, 1.62 | 0.67 | ||
| ALL | 4.06 | 2.18, 7.76 | <0.001 | ||
| MM/PCL | 0.79 | 0.36, 1.68 | 0.55 | ||
| Lymphodepletion regimen | 691 | 102 | |||
| Low-intensity Cy/Flu | — | — | |||
| High-intensity Cy/Flu | 1.45 | 0.93, 2.25 | 0.10 | ||
| Other | 2.44 | 1.03, 5.35 | 0.031 | ||
| Lymphodepletion regimen (low-dose Cy/Flu vs. other) | 691 | 102 | |||
| Low-intensity Cy/Flu | — | — | |||
| Other | 1.56 | 1.02, 2.38 | 0.040 | ||
| CAR target | 691 | 102 | |||
| BCMA | — | — | |||
| CD19 | 1.78 | 0.98, 3.54 | 0.076 | ||
| CD20 | 0.64 | 0.14, 2.15 | 0.51 | ||
| Costimulatory domain | 691 | 102 | |||
| 4-1BB | — | — | |||
| CD28 | 1.02 | 0.63, 1.62 | 0.93 | ||
| CD28-41BB | 0.40 | 0.09, 1.13 | 0.13 | ||
| CRS grade | 691 | 102 | 2.01 | 1.60, 2.56 | <0.001 |
| ICANS grade | 691 | 102 | 1.51 | 1.29, 1.76 | <0.001 |
| Pre-LD ANC | 690 | 102 | 0.15 | 0.08, 0.24 | <0.001 |
| Pre-LD Hb | 691 | 102 | 0.61 | 0.52, 0.70 | <0.001 |
| Pre-LD platelet count | 691 | 102 | 0.06 | 0.03, 0.11 | <0.001 |
| Pre-LD LDH | 677 | 101 | 9.95 | 4.72, 21.5 | <0.001 |
| Pre-LD CRP | 439 | 58 | 3.44 | 2.24, 5.43 | <0.001 |
| Day +0 CRP | 644 | 85 | 3.24 | 2.21, 4.84 | <0.001 |
| Day +3 CRP | 643 | 91 | 3.54 | 2.30, 5.65 | <0.001 |
| Day +5 CRP | 444 | 81 | 3.36 | 2.10, 5.57 | <0.001 |
| Day +7 CRP | 549 | 77 | 3.73 | 2.36, 6.08 | <0.001 |
| Peak CRP | 683 | 98 | 16.7 | 7.91, 38.4 | <0.001 |
| Pre-LD ferritin | 486 | 69 | 5.57 | 3.42, 9.51 | <0.001 |
| Day +0 ferritin | 658 | 90 | 9.87 | 5.82, 17.5 | <0.001 |
| Day +3 ferritin | 649 | 94 | 9.24 | 5.65, 15.8 | <0.001 |
| Day +5 ferritin | 445 | 80 | 6.74 | 4.17, 11.5 | <0.001 |
| Day +7 ferritin | 555 | 81 | 5.49 | 3.65, 8.57 | <0.001 |
| Peak ferritin | 691 | 102 | 6.90 | 4.80, 10.2 | <0.001 |
| Pre-LD IL-6 | 130 | 14 | 4.13 | 1.58, 11.4 | 0.004 |
| Day +0 IL-6 | 336 | 41 | 2.95 | 1.68, 5.24 | <0.001 |
| Day +3 IL-6 | 476 | 71 | 1.44 | 1.11, 1.86 | 0.005 |
| Day +5 IL-6 | 356 | 60 | 1.56 | 1.18, 2.07 | 0.002 |
| Day +7 IL-6 | 413 | 59 | 2.13 | 1.53, 2.99 | <0.001 |
| Peak IL-6 | 551 | 79 | 2.60 | 2.00, 3.43 | <0.001 |
| Pre-LD fibrinogen | 378 | 60 | 3.87 | 0.53, 29.4 | 0.19 |
| Day +0 fibrinogen | 606 | 79 | 7.19 | 1.13, 46.4 | 0.037 |
| Day +3 fibrinogen | 613 | 85 | 3.85 | 0.66, 23.2 | 0.14 |
| Day +5 fibrinogen | 404 | 69 | 0.80 | 0.16, 4.14 | 0.79 |
| Day +7 fibrinogen | 508 | 70 | 0.68 | 0.17, 2.78 | 0.58 |
| Nadir fibrinogen | 661 | 94 | 0.07 | 0.03, 0.18 | <0.001 |
| Pre-LD D-dimer | 341 | 57 | 2.97 | 1.50, 5.92 | 0.002 |
| Day +0 D-dimer | 557 | 72 | 4.85 | 2.65, 9.00 | <0.001 |
| Day +3 D-dimer | 561 | 79 | 4.20 | 2.41, 7.41 | <0.001 |
| Day +5 D-dimer | 373 | 63 | 4.01 | 2.21, 7.44 | <0.001 |
| Day +7 D-dimer | 455 | 60 | 3.58 | 2.09, 6.19 | <0.001 |
| Peak D-dimer | 651 | 92 | 4.69 | 3.00, 7.49 | <0.001 |
| Pre-LD PTT | 455 | 71 | 5.04 | 0.18, 114 | 0.32 |
| Day +0 PTT | 502 | 69 | 7.63 | 0.43, 111 | 0.14 |
| Day +3 PTT | 494 | 74 | 7.22 | 1.05, 45.2 | 0.038 |
| Day +5 PTT | 298 | 55 | 4.70 | 0.78, 26.0 | 0.079 |
| Day +7 PTT | 402 | 59 | 4.81 | 0.67, 30.0 | 0.10 |
| Peak PTT | 606 | 91 | 9.09 | 2.90, 27.9 | <0.001 |
| 1 OR = Odds Ratio, CI = Confidence Interval | |||||
df_regression <- tbl_uvregression$table_body %>%
mutate(color = case_when(estimate < 1 & p.value < 0.05 ~ "Low",
estimate > 1 & p.value < 0.05 ~ "High",
.default = "Other"),
color = as.factor(color))
vp <-
ggplot(data = df_regression, aes(x = estimate, y = -log2(p.value), color = color)) +
geom_point() +
scale_color_manual(values = c("red", "darkgreen", "darkgray")) +
geom_hline(yintercept = -log2(0.05), color = "black", linetype = "dashed") +
geom_hline(yintercept = -log2(0.01), color = "black", linetype = "dashed") +
geom_vline(xintercept = 1, color = "black", linetype = "dashed") +
geom_text(aes(x = 19, y = 3, label = "p = 0.05"), color = "black", stat = "unique") +
geom_text(aes(x = 19, y = 8, label = "p = 0.01"), color = "black", stat = "unique") +
geom_text(aes(x = 2, y = 75, label = "OR = 1"), color = "black", stat = "unique") +
scale_x_continuous(name = "Odds of grade 3-4 ICAHT", limits = c(0, 20)) +
coord_cartesian(clip = "off") +
theme(legend.position = "none")
ggplotly(vp)
options(ggrepel.max.overlaps = Inf)
vp + geom_text_repel(data = filter(df_regression, p.value < 0.05),
aes(label = var_label))
# Impute missing data
df_car_hematotox <- df %>%
select(upn,
icaht_grade_3_4,
anc_pre_ld,
hb_pre_ld,
plt_pre_ld,
crp_pre_ld,
ferritin_pre_ld)
recipe <- recipe(icaht_grade_3_4 ~ ., data = df_car_hematotox) %>%
update_role(upn, new_role = "ID") %>%
update_role_requirements("ID", bake = FALSE) %>%
step_impute_bag(anc_pre_ld,
hb_pre_ld,
plt_pre_ld,
crp_pre_ld,
ferritin_pre_ld,
seed_val = 100)
# Extract transformed dataframe
df_car_hematotox <- recipe %>%
prep() %>%
bake(new_data = NULL)
# Calculate CAR-HEMATOTOX score (continuous)
df_car_hematotox <- df_car_hematotox %>%
mutate(
plt_hematotox_score = case_when(
plt_pre_ld > 175 ~ 0,
plt_pre_ld %in% c(75:175) ~ 1,
plt_pre_ld < 75 ~ 2,
is.na(plt_pre_ld) ~ NA_real_
),
anc_hematotox_score = case_when(
anc_pre_ld > 1.2 ~ 0,
anc_pre_ld < 1.2 ~ 1,
is.na(anc_pre_ld) ~ NA_real_
),
hb_hematotox_score = case_when(hb_pre_ld > 9 ~ 0,
hb_pre_ld < 9 ~ 1,
is.na(hb_pre_ld) ~ NA_real_),
crp_hematotox_score = case_when(crp_pre_ld < 3 ~ 0,
crp_pre_ld > 3 ~ 1,
is.na(crp_pre_ld) ~ NA_real_),
ferritin_hematotox_score = case_when(
ferritin_pre_ld < 650 ~ 0,
ferritin_pre_ld %in% c(650:2000) ~ 1,
ferritin_pre_ld > 2000 ~ 2,
is.na(ferritin_pre_ld) ~ NA_real_
)
)
# Calculate CAR-HEMATOTOX score (categorical)
df_car_hematotox <- df_car_hematotox %>%
mutate(
car_hematotox_score = rowSums(across(
plt_hematotox_score:ferritin_hematotox_score
)),
car_hematotox_categorical = case_when(car_hematotox_score < 2 ~ "Low",
car_hematotox_score >= 2 ~ "High"),
car_hematotox_categorical = as.factor(car_hematotox_categorical),
car_hematotox_categorical = fct_relevel(car_hematotox_categorical, c("Low", "High"))
)
var_label(df_car_hematotox$car_hematotox_score) <- "CAR-HEMATOTOX score (continuous)"
var_label(df_car_hematotox$car_hematotox_categorical) <- "CAR-HEMATOTOX score (categorical)"
df_matrix <- df_car_hematotox %>%
select(upn, icaht_grade_3_4, car_hematotox_categorical) %>%
mutate(car_hematotox_high = ifelse(car_hematotox_categorical == "High", 1, 0)) %>%
drop_na()
expected_value <- factor(df_matrix$icaht_grade_3_4, levels = c("1", "0"))
predicted_value <- factor(df_matrix$car_hematotox_high, levels = c("1", "0"))
matrix <-
confusionMatrix(data = predicted_value,
reference = expected_value,
positive = "1")
matrix
Confusion Matrix and Statistics
Reference
Prediction 1 0
1 86 301
0 5 234
Accuracy : 0.5112
95% CI : (0.4713, 0.551)
No Information Rate : 0.8546
P-Value [Acc > NIR] : 1
Kappa : 0.1628
Mcnemar's Test P-Value : <2e-16
Sensitivity : 0.9451
Specificity : 0.4374
Pos Pred Value : 0.2222
Neg Pred Value : 0.9791
Prevalence : 0.1454
Detection Rate : 0.1374
Detection Prevalence : 0.6182
Balanced Accuracy : 0.6912
'Positive' Class : 1
# Set Cox-Snell R squared = 0.1
# 6 predictors
pmsampsize(
type = "b",
nagrsquared = NA,
csrsquared = 0.1,
rsquared = NA,
parameters = 6,
shrinkage = 0.9,
prevalence = 0.16,
cstatistic = NA,
seed = 123,
rate = NA,
timepoint = NA,
meanfup = NA,
intercept = NA,
sd = NA,
mmoe = 1.1)
NB: Assuming 0.05 acceptable difference in apparent & adjusted R-squared
NB: Assuming 0.05 margin of error in estimation of intercept
NB: Events per Predictor Parameter (EPP) assumes prevalence = 0.16
Samp_size Shrinkage Parameter CS_Rsq Max_Rsq Nag_Rsq EPP
Criteria 1 510 0.900 6 0.1 0.585 0.171 13.60
Criteria 2 192 0.774 6 0.1 0.585 0.171 5.12
Criteria 3 207 0.900 6 0.1 0.585 0.171 5.52
Final 510 0.900 6 0.1 0.585 0.171 13.60
Minimum sample size required for new model development based on user inputs = 510,
with 82 events (assuming an outcome prevalence = 0.16) and an EPP = 13.6
Create training and tests sets (70%/30%)
# Create data frame for tidy model
df_tidy <- df %>%
mutate(
disease_cat = ifelse(disease_cat == "ALL", "ALL", "Other"),
disease_cat = factor(disease_cat, levels = c("ALL", "Other"))
) %>%
select(
upn,
icaht_grade_3_4,
disease_cat,
LD_regimen_low_CyFlu_vs_other,
anc_pre_ld_log10,
plt_pre_ld_log10,
ldh_pre_ld_log10,
ferritin_pre_ld_log10,
ferritin_day_0_log10,
ferritin_day_3_log10,
crp_day_3_log10,
d_dimer_day_3_log10
) %>%
mutate(
icaht_grade_3_4 = as.factor(icaht_grade_3_4),
icaht_grade_3_4 = fct_relevel(icaht_grade_3_4, c("0", "1"))
)
# Split df into training set and testing set
set.seed(100)
df_split <-
initial_split(df_tidy, prop = 0.7, strata = icaht_grade_3_4)
df_train <- training(df_split)
df_test <- testing(df_split)
df_train %>%
count(icaht_grade_3_4) %>%
mutate(prop = n / sum(n))
# A tibble: 2 × 3
# Rowwise:
icaht_grade_3_4 n prop
<fct> <int> <dbl>
1 0 412 1
2 1 71 1
# A tibble: 2 × 3
# Rowwise:
icaht_grade_3_4 n prop
<fct> <int> <dbl>
1 0 177 1
2 1 31 1
Build and train logistic regression model with LASSO regularization
# Create model recipe to pre-process data
recipe <- recipe(icaht_grade_3_4 ~ ., data = df_train) %>%
update_role(upn, new_role = "ID") %>% # Assign upn to identifier role (as opposed to predictor or outcome)
update_role_requirements("ID", bake = FALSE) %>%
step_impute_bag(
# Impute missing values (default is to use all other predictors)
disease_cat,
LD_regimen_low_CyFlu_vs_other,
anc_pre_ld_log10,
plt_pre_ld_log10,
ldh_pre_ld_log10,
ferritin_pre_ld_log10,
ferritin_day_0_log10,
ferritin_day_3_log10,
crp_day_3_log10,
d_dimer_day_3_log10,
seed_val = 100
) %>%
# step_zv(all_numeric(), -all_outcomes()) %>% # Remove variables containing only a single value
step_normalize(all_numeric(), -all_outcomes()) %>% # Normalize numeric data
step_dummy(disease_cat, LD_regimen_low_CyFlu_vs_other) # Create dummy variables for disease_cat (nominal to numeric binary)
# Build model specification
spec_model <- logistic_reg(penalty = 0.1, mixture = 1) %>%
set_engine("glmnet")
# Create model formula
model_formula <-
icaht_grade_3_4 ~ disease_cat_Other + anc_pre_ld_log10 + plt_pre_ld_log10 + ldh_pre_ld_log10 + ferritin_pre_ld_log10
# Create model workflow (bundles recipe and model)
wf <- workflow() %>%
add_model(spec_model, formula = model_formula) %>%
add_recipe(recipe)
# Fit model on training data
fit_test <- wf %>%
fit(data = df_train)
# Extract estimated coefficients
fit_test %>%
extract_fit_parsnip() %>%
tidy()
# A tibble: 6 × 3
term estimate penalty
<chr> <dbl> <dbl>
1 (Intercept) -1.77 0.1
2 disease_cat_Other 0 0.1
3 anc_pre_ld_log10 -0.0668 0.1
4 plt_pre_ld_log10 -0.135 0.1
5 ldh_pre_ld_log10 0 0.1
6 ferritin_pre_ld_log10 0.00617 0.1
Tune LASSO parameters
# Build set of bootstrap resamples
set.seed(123)
bootstrap_resamples <-
bootstraps(df_train, strata = icaht_grade_3_4)
# Create tuning specifications
# Set penalty = tune() instead of to a single value
tune_spec <- logistic_reg(penalty = tune(), mixture = 1) %>%
set_engine("glmnet")
# Use penalty() to set up an appropriate grid
lambda_grid <- grid_regular(penalty(), levels = 5)
# Tune grid using workflow object
doParallel::registerDoParallel()
wf_tune <- workflow() %>%
add_model(tune_spec, formula = model_formula) %>%
add_recipe(recipe)
set.seed(2023)
lasso_grid <- tune_grid(wf_tune,
resamples = bootstrap_resamples,
grid = lambda_grid)
# lasso_grid <- tune_grid(
# wf %>% update_model(tune_spec, formula = model_formula),
# resamples = bootstrap_resamples,
# grid = lambda_grid
# )
lasso_grid %>% collect_metrics()
# A tibble: 15 × 7
penalty .metric .estimator mean n std_err .config
<dbl> <chr> <chr> <dbl> <int> <dbl> <chr>
1 0.0000000001 accuracy binary 0.874 25 0.00299 Preproces…
2 0.0000000001 brier_class binary 0.0999 25 0.00189 Preproces…
3 0.0000000001 roc_auc binary 0.777 25 0.00826 Preproces…
4 0.0000000316 accuracy binary 0.874 25 0.00299 Preproces…
5 0.0000000316 brier_class binary 0.0999 25 0.00189 Preproces…
6 0.0000000316 roc_auc binary 0.777 25 0.00826 Preproces…
7 0.00001 accuracy binary 0.874 25 0.00299 Preproces…
8 0.00001 brier_class binary 0.0999 25 0.00189 Preproces…
9 0.00001 roc_auc binary 0.777 25 0.00826 Preproces…
10 0.00316 accuracy binary 0.875 25 0.00293 Preproces…
11 0.00316 brier_class binary 0.0997 25 0.00186 Preproces…
12 0.00316 roc_auc binary 0.778 25 0.00829 Preproces…
13 1 accuracy binary 0.855 25 0.00293 Preproces…
14 1 brier_class binary 0.124 25 0.00207 Preproces…
15 1 roc_auc binary 0.5 25 0 Preproces…
# Visualize performance with regularization parameter
lasso_grid %>%
collect_metrics() %>%
ggplot(aes(x = penalty, y = mean, color = .metric)) +
geom_errorbar(aes(ymin = mean - std_err, ymax = mean + std_err), alpha = 0.5) +
geom_line(linewidth = 1.5) +
facet_wrap(~ .metric, scales = "free", nrow = 2) +
scale_x_log10() +
theme(legend.position = "none")
# Select highest AUROC
highest_auroc <- lasso_grid %>%
select_best(metric = "roc_auc")
# Finalize workflow
wf_final <-
finalize_workflow(wf_tune,
highest_auroc)
# Visualize most important variables
wf_final %>%
fit(data = df_train) %>%
pull_workflow_fit() %>%
vi(lambda = highest_auroc$penalty) %>%
mutate(Importance = abs(Importance),
Variable = fct_reorder(Variable, Importance)) %>%
ggplot(aes(x = Importance, y = Variable, fill = Sign)) +
geom_col() +
scale_x_continuous(expand = c(0, 0)) +
labs(y = NULL)
Fit final best model on training set and evaluate on test set
# Fit the final best model to the training set and evaluate the test set
final_fit <- last_fit(wf_final,
df_split)
# Extract trained workflow from final_fit
wf_final_trained <- final_fit %>%
extract_workflow()
wf_final_trained
══ Workflow [trained] ════════════════════════════════════════════════
Preprocessor: Recipe
Model: logistic_reg()
── Preprocessor ──────────────────────────────────────────────────────
3 Recipe Steps
• step_impute_bag()
• step_normalize()
• step_dummy()
── Model ─────────────────────────────────────────────────────────────
Call: glmnet::glmnet(x = maybe_matrix(x), y = y, family = "binomial", alpha = ~1)
Df %Dev Lambda
1 0 0.00 0.123300
2 2 2.56 0.112400
3 2 4.94 0.102400
4 3 7.08 0.093280
5 4 9.27 0.084990
6 4 11.39 0.077440
7 4 13.12 0.070560
8 4 14.56 0.064300
9 4 15.77 0.058580
10 4 16.79 0.053380
11 4 17.66 0.048640
12 4 18.40 0.044320
13 4 19.04 0.040380
14 4 19.58 0.036790
15 4 20.05 0.033520
16 4 20.45 0.030550
17 4 20.79 0.027830
18 4 21.08 0.025360
19 5 21.33 0.023110
20 5 21.56 0.021050
21 5 21.76 0.019180
22 5 21.92 0.017480
23 5 22.07 0.015930
24 5 22.19 0.014510
25 5 22.29 0.013220
26 5 22.38 0.012050
27 5 22.45 0.010980
28 5 22.51 0.010000
29 5 22.56 0.009114
30 5 22.61 0.008304
31 5 22.64 0.007566
32 5 22.68 0.006894
33 5 22.70 0.006282
34 5 22.72 0.005724
35 5 22.74 0.005215
36 5 22.76 0.004752
37 5 22.77 0.004330
38 5 22.78 0.003945
39 5 22.79 0.003595
40 5 22.80 0.003275
41 5 22.80 0.002984
42 5 22.81 0.002719
43 5 22.81 0.002478
44 5 22.82 0.002258
45 5 22.82 0.002057
46 5 22.82 0.001874
...
and 6 more lines.
# saveRDS(wf_final_trained, "eIPMPre.rds")
# Extract trained model from final_fit
final_fit_mod <- final_fit %>%
extract_fit_parsnip()
final_fit_mod
parsnip model object
Call: glmnet::glmnet(x = maybe_matrix(x), y = y, family = "binomial", alpha = ~1)
Df %Dev Lambda
1 0 0.00 0.123300
2 2 2.56 0.112400
3 2 4.94 0.102400
4 3 7.08 0.093280
5 4 9.27 0.084990
6 4 11.39 0.077440
7 4 13.12 0.070560
8 4 14.56 0.064300
9 4 15.77 0.058580
10 4 16.79 0.053380
11 4 17.66 0.048640
12 4 18.40 0.044320
13 4 19.04 0.040380
14 4 19.58 0.036790
15 4 20.05 0.033520
16 4 20.45 0.030550
17 4 20.79 0.027830
18 4 21.08 0.025360
19 5 21.33 0.023110
20 5 21.56 0.021050
21 5 21.76 0.019180
22 5 21.92 0.017480
23 5 22.07 0.015930
24 5 22.19 0.014510
25 5 22.29 0.013220
26 5 22.38 0.012050
27 5 22.45 0.010980
28 5 22.51 0.010000
29 5 22.56 0.009114
30 5 22.61 0.008304
31 5 22.64 0.007566
32 5 22.68 0.006894
33 5 22.70 0.006282
34 5 22.72 0.005724
35 5 22.74 0.005215
36 5 22.76 0.004752
37 5 22.77 0.004330
38 5 22.78 0.003945
39 5 22.79 0.003595
40 5 22.80 0.003275
41 5 22.80 0.002984
42 5 22.81 0.002719
43 5 22.81 0.002478
44 5 22.82 0.002258
45 5 22.82 0.002057
46 5 22.82 0.001874
47 5 22.83 0.001708
48 5 22.83 0.001556
49 5 22.83 0.001418
50 5 22.83 0.001292
51 5 22.83 0.001177
52 5 22.83 0.001073
# A tibble: 6 × 3
term estimate penalty
<chr> <dbl> <dbl>
1 (Intercept) -1.98 0.00316
2 disease_cat_Other -0.207 0.00316
3 anc_pre_ld_log10 -0.534 0.00316
4 plt_pre_ld_log10 -0.278 0.00316
5 ldh_pre_ld_log10 0.477 0.00316
6 ferritin_pre_ld_log10 0.460 0.00316
# # Test model against Shiny app output
# df_test <- tibble(
# disease_cat = "ALL",
# LD_regimen_low_CyFlu_vs_other = NA,
# anc_pre_ld = 1.5,
# plt_pre_ld = 175,
# ldh_pre_ld = 250,
# ferritin_pre_ld = 300,
# ferritin_day_0 = NA,
# ferritin_day_3 = NA,
# crp_day_3 = NA,
# d_dimer_day_3 = NA
# )
#
# df_test <- df_test %>%
# mutate(
# across(anc_pre_ld:d_dimer_day_3, ~ ifelse(. == 0, 0.01, .), .names = "{.col}_log10"),
# across(anc_pre_ld_log10:d_dimer_day_3_log10, log10)
# )
#
# predict(wf_final_trained, df_test, type = "prob")
# Compute model-specific variable importance scores for fitted model
final_fit %>%
extract_fit_parsnip() %>%
vip()
# Evaluate final model on test set
final_fit %>% collect_metrics()
# A tibble: 3 × 4
.metric .estimator .estimate .config
<chr> <chr> <dbl> <chr>
1 accuracy binary 0.885 Preprocessor1_Model1
2 roc_auc binary 0.873 Preprocessor1_Model1
3 brier_class binary 0.0868 Preprocessor1_Model1
# Plot calibration curve for test set using {runway}
# Augment df_test (i.e., add columns for predictions to df_train)
df_mod_pre_ferritin_pre_ld <- wf_final_trained %>%
augment(new_data = df_test) %>%
mutate(.pred_class = as.numeric(as.character(.pred_class)))
p_cal_pre_ferritin_pre_ld <- cal_plot(
df_mod_pre_ferritin_pre_ld,
outcome = "icaht_grade_3_4",
positive = "1",
prediction = ".pred_1",
n_bins = 0,
show_loess = TRUE,
plot_title = "eIPMPre"
)
p_cal_pre_ferritin_pre_ld
Method: maximize_metric
Predictor: .pred_1
Outcome: icaht_grade_3_4
Direction: >=
AUC n n_pos n_neg
0.873 208 31 177
optimal_cutpoint youden acc sensitivity specificity tp fn fp tn
0.1176 0.6191 0.7212 0.9355 0.6836 29 2 56 121
Predictor summary:
Data Min. 5% 1st Qu. Median Mean
Overall 0.01479182 0.02326008 0.04465773 0.09131790 0.1567422
0 0.01479182 0.02211389 0.03930081 0.07594596 0.1143297
1 0.05917070 0.11217653 0.17613996 0.29002852 0.3989035
3rd Qu. 95% Max. SD NAs
0.1948782 0.6011735 0.9946637 0.1795614 0
0.1519761 0.3298866 0.6286539 0.1082159 0
0.6252787 0.9019418 0.9946637 0.2873314 0
Build and train logistic regression model with LASSO regularization
# Create model formula
model_formula <-
icaht_grade_3_4 ~ disease_cat_Other + LD_regimen_low_CyFlu_vs_other_Other + anc_pre_ld_log10 +
plt_pre_ld_log10 + ldh_pre_ld_log10 + ferritin_pre_ld_log10
# Create model workflow (bundles recipe and model)
wf <- workflow() %>%
add_model(spec_model, formula = model_formula) %>%
add_recipe(recipe)
# Fit model on training set
fit_test <- wf %>%
fit(data = df_train)
# Extract estimated coefficients
fit_test %>%
extract_fit_parsnip() %>%
tidy()
# A tibble: 7 × 3
term estimate penalty
<chr> <dbl> <dbl>
1 (Intercept) -1.77 0.1
2 disease_cat_Other 0 0.1
3 LD_regimen_low_CyFlu_vs_other_Other 0 0.1
4 anc_pre_ld_log10 -0.0668 0.1
5 plt_pre_ld_log10 -0.135 0.1
6 ldh_pre_ld_log10 0 0.1
7 ferritin_pre_ld_log10 0.00617 0.1
Tune LASSO parameters
# Tune grid using workflow object
doParallel::registerDoParallel()
wf_tune <- workflow() %>%
add_model(tune_spec, formula = model_formula) %>%
add_recipe(recipe)
set.seed(2023)
lasso_grid <- tune_grid(wf_tune,
resamples = bootstrap_resamples,
grid = lambda_grid)
lasso_grid %>% collect_metrics()
# A tibble: 15 × 7
penalty .metric .estimator mean n std_err .config
<dbl> <chr> <chr> <dbl> <int> <dbl> <chr>
1 0.0000000001 accuracy binary 0.875 25 0.00304 Preproces…
2 0.0000000001 brier_class binary 0.0965 25 0.00187 Preproces…
3 0.0000000001 roc_auc binary 0.794 25 0.00790 Preproces…
4 0.0000000316 accuracy binary 0.875 25 0.00304 Preproces…
5 0.0000000316 brier_class binary 0.0965 25 0.00187 Preproces…
6 0.0000000316 roc_auc binary 0.794 25 0.00790 Preproces…
7 0.00001 accuracy binary 0.875 25 0.00304 Preproces…
8 0.00001 brier_class binary 0.0965 25 0.00187 Preproces…
9 0.00001 roc_auc binary 0.794 25 0.00790 Preproces…
10 0.00316 accuracy binary 0.876 25 0.00306 Preproces…
11 0.00316 brier_class binary 0.0965 25 0.00184 Preproces…
12 0.00316 roc_auc binary 0.792 25 0.00802 Preproces…
13 1 accuracy binary 0.855 25 0.00293 Preproces…
14 1 brier_class binary 0.124 25 0.00207 Preproces…
15 1 roc_auc binary 0.5 25 0 Preproces…
# Visualize performance with regularization parameter
lasso_grid %>%
collect_metrics() %>%
ggplot(aes(x = penalty, y = mean, color = .metric)) +
geom_errorbar(aes(ymin = mean - std_err, ymax = mean + std_err), alpha = 0.5) +
geom_line(linewidth = 1.5) +
facet_wrap(~ .metric, scales = "free", nrow = 2) +
scale_x_log10() +
theme(legend.position = "none")
# Select highest accuracy
highest_auroc <- lasso_grid %>%
select_best(metric = "roc_auc")
# Finalize workflow
wf_final <-
finalize_workflow(wf_tune,
highest_auroc)
# Visualize most important variables
wf_final %>%
fit(data = df_train) %>%
pull_workflow_fit() %>%
vi(lambda = highest_auroc$penalty) %>%
mutate(Importance = abs(Importance),
Variable = fct_reorder(Variable, Importance)) %>%
ggplot(aes(x = Importance, y = Variable, fill = Sign)) +
geom_col() +
scale_x_continuous(expand = c(0, 0)) +
labs(y = NULL)
Fit final best model on training set and evaluate on test set
# Fit the final best model to the training set and evaluate the test set
final_fit <- last_fit(wf_final,
df_split)
# Extract trained workflow from final_fit
wf_final_trained <- final_fit %>%
extract_workflow()
wf_final_trained
══ Workflow [trained] ════════════════════════════════════════════════
Preprocessor: Recipe
Model: logistic_reg()
── Preprocessor ──────────────────────────────────────────────────────
3 Recipe Steps
• step_impute_bag()
• step_normalize()
• step_dummy()
── Model ─────────────────────────────────────────────────────────────
Call: glmnet::glmnet(x = maybe_matrix(x), y = y, family = "binomial", alpha = ~1)
Df %Dev Lambda
1 0 0.00 0.123300
2 2 2.56 0.112400
3 2 4.94 0.102400
4 3 7.08 0.093280
5 4 9.27 0.084990
6 4 11.39 0.077440
7 4 13.12 0.070560
8 4 14.56 0.064300
9 4 15.77 0.058580
10 4 16.79 0.053380
11 4 17.66 0.048640
12 4 18.40 0.044320
13 5 19.27 0.040380
14 5 20.07 0.036790
15 5 20.76 0.033520
16 5 21.36 0.030550
17 5 21.87 0.027830
18 5 22.31 0.025360
19 6 22.71 0.023110
20 6 23.06 0.021050
21 6 23.35 0.019180
22 6 23.60 0.017480
23 6 23.82 0.015930
24 6 24.00 0.014510
25 6 24.16 0.013220
26 6 24.29 0.012050
27 6 24.41 0.010980
28 6 24.50 0.010000
29 6 24.58 0.009114
30 6 24.65 0.008304
31 6 24.71 0.007566
32 6 24.76 0.006894
33 6 24.80 0.006282
34 6 24.83 0.005724
35 6 24.86 0.005215
36 6 24.89 0.004752
37 6 24.91 0.004330
38 6 24.92 0.003945
39 6 24.94 0.003595
40 6 24.95 0.003275
41 6 24.96 0.002984
42 6 24.97 0.002719
43 6 24.98 0.002478
44 6 24.98 0.002258
45 6 24.99 0.002057
46 6 24.99 0.001874
...
and 8 more lines.
# Extract trained model from final_fit
final_fit_mod <- final_fit %>%
extract_fit_parsnip()
final_fit_mod
parsnip model object
Call: glmnet::glmnet(x = maybe_matrix(x), y = y, family = "binomial", alpha = ~1)
Df %Dev Lambda
1 0 0.00 0.123300
2 2 2.56 0.112400
3 2 4.94 0.102400
4 3 7.08 0.093280
5 4 9.27 0.084990
6 4 11.39 0.077440
7 4 13.12 0.070560
8 4 14.56 0.064300
9 4 15.77 0.058580
10 4 16.79 0.053380
11 4 17.66 0.048640
12 4 18.40 0.044320
13 5 19.27 0.040380
14 5 20.07 0.036790
15 5 20.76 0.033520
16 5 21.36 0.030550
17 5 21.87 0.027830
18 5 22.31 0.025360
19 6 22.71 0.023110
20 6 23.06 0.021050
21 6 23.35 0.019180
22 6 23.60 0.017480
23 6 23.82 0.015930
24 6 24.00 0.014510
25 6 24.16 0.013220
26 6 24.29 0.012050
27 6 24.41 0.010980
28 6 24.50 0.010000
29 6 24.58 0.009114
30 6 24.65 0.008304
31 6 24.71 0.007566
32 6 24.76 0.006894
33 6 24.80 0.006282
34 6 24.83 0.005724
35 6 24.86 0.005215
36 6 24.89 0.004752
37 6 24.91 0.004330
38 6 24.92 0.003945
39 6 24.94 0.003595
40 6 24.95 0.003275
41 6 24.96 0.002984
42 6 24.97 0.002719
43 6 24.98 0.002478
44 6 24.98 0.002258
45 6 24.99 0.002057
46 6 24.99 0.001874
47 6 24.99 0.001708
48 6 25.00 0.001556
49 6 25.00 0.001418
50 6 25.00 0.001292
51 6 25.00 0.001177
52 6 25.00 0.001073
53 6 25.00 0.000977
54 6 25.01 0.000890
# A tibble: 7 × 3
term estimate penalty
<chr> <dbl> <dbl>
1 (Intercept) -2.40 0.0000000001
2 disease_cat_Other -0.276 0.0000000001
3 LD_regimen_low_CyFlu_vs_other_Other 0.885 0.0000000001
4 anc_pre_ld_log10 -0.546 0.0000000001
5 plt_pre_ld_log10 -0.377 0.0000000001
6 ldh_pre_ld_log10 0.461 0.0000000001
7 ferritin_pre_ld_log10 0.451 0.0000000001
# Evaluate final model on test set
final_fit %>% collect_metrics()
# A tibble: 3 × 4
.metric .estimator .estimate .config
<chr> <chr> <dbl> <chr>
1 accuracy binary 0.885 Preprocessor1_Model1
2 roc_auc binary 0.840 Preprocessor1_Model1
3 brier_class binary 0.0906 Preprocessor1_Model1
# Plot calibration curve for test set
# Using {probably}
final_fit %>%
collect_predictions() %>%
cal_plot_breaks(
truth = icaht_grade_3_4,
estimate = .pred_1,
event_level = "second",
num_breaks = 5
)
# Using {runway}
# Augment df_test (i.e., add columns for predictions to df_train)
df_mod_pre_ferritin_pre_ld_LD_regimen <- wf_final_trained %>%
augment(new_data = df_test) %>%
mutate(.pred_class = as.numeric(as.character(.pred_class)))
p_cal_pre_ferritin_pre_ld_LD_regimen <- cal_plot(
df_mod_pre_ferritin_pre_ld_LD_regimen,
outcome = "icaht_grade_3_4",
positive = "1",
prediction = ".pred_1",
n_bins = 0,
show_loess = TRUE,
plot_title = "eIPMPre (+ LD regimen)"
)
p_cal_pre_ferritin_pre_ld_LD_regimen
Method: maximize_metric
Predictor: .pred_1
Outcome: icaht_grade_3_4
Direction: >=
AUC n n_pos n_neg
0.8402 208 31 177
optimal_cutpoint youden acc sensitivity specificity tp fn fp tn
0.1315 0.5619 0.7404 0.8387 0.7232 26 5 49 128
Predictor summary:
Data Min. 5% 1st Qu. Median Mean
Overall 0.01060906 0.01585109 0.03924637 0.08963944 0.1546460
0 0.01060906 0.01439415 0.03395278 0.07364204 0.1123256
1 0.03593784 0.07432365 0.13772072 0.27499010 0.3962817
3rd Qu. 95% Max. SD NAs
0.1747355 0.5596489 0.9966204 0.1895385 0
0.1435229 0.3445912 0.7379206 0.1168601 0
0.6503889 0.9099296 0.9966204 0.3112291 0
df_mod_pre_ferritin_pre_ld_LD_regimen <- df_mod_pre_ferritin_pre_ld_LD_regimen %>%
mutate(icaht_grade_3_4 = fct_relevel(icaht_grade_3_4, c("0", "1")))
dcurves::dca(
icaht_grade_3_4 ~ .pred_1,
data = df_mod_pre_ferritin_pre_ld_LD_regimen,
thresholds = seq(0, 0.35, by = 0.01),
label = list(.pred_1 = "eIPMPre")
) %>%
plot(smooth = TRUE)
Build and train logistic regression model with LASSO regularization
# Create model formula
model_formula <-
icaht_grade_3_4 ~ disease_cat_Other + anc_pre_ld_log10 + plt_pre_ld_log10 + ldh_pre_ld_log10 + ferritin_day_3_log10
# Create model workflow (bundles recipe and model)
wf <- workflow() %>%
add_model(spec_model, formula = model_formula) %>%
add_recipe(recipe)
# Fit model on training data
fit_test <- wf %>%
fit(data = df_train)
# Extract estimated coefficients
fit_test %>%
extract_fit_parsnip() %>%
tidy()
# A tibble: 6 × 3
term estimate penalty
<chr> <dbl> <dbl>
1 (Intercept) -1.78 0.1
2 disease_cat_Other 0 0.1
3 anc_pre_ld_log10 -0.0683 0.1
4 plt_pre_ld_log10 -0.0710 0.1
5 ldh_pre_ld_log10 0 0.1
6 ferritin_day_3_log10 0.156 0.1
Tune LASSO parameters
# Tune grid using workflow object
doParallel::registerDoParallel()
wf_tune <- workflow() %>%
add_model(tune_spec, formula = model_formula) %>%
add_recipe(recipe)
set.seed(2023)
lasso_grid <- tune_grid(wf_tune,
resamples = bootstrap_resamples,
grid = lambda_grid)
lasso_grid %>% collect_metrics()
# A tibble: 15 × 7
penalty .metric .estimator mean n std_err .config
<dbl> <chr> <chr> <dbl> <int> <dbl> <chr>
1 0.0000000001 accuracy binary 0.873 25 0.00397 Preproces…
2 0.0000000001 brier_class binary 0.0973 25 0.00213 Preproces…
3 0.0000000001 roc_auc binary 0.814 25 0.00761 Preproces…
4 0.0000000316 accuracy binary 0.873 25 0.00397 Preproces…
5 0.0000000316 brier_class binary 0.0973 25 0.00213 Preproces…
6 0.0000000316 roc_auc binary 0.814 25 0.00761 Preproces…
7 0.00001 accuracy binary 0.873 25 0.00397 Preproces…
8 0.00001 brier_class binary 0.0973 25 0.00213 Preproces…
9 0.00001 roc_auc binary 0.814 25 0.00761 Preproces…
10 0.00316 accuracy binary 0.875 25 0.00332 Preproces…
11 0.00316 brier_class binary 0.0970 25 0.00208 Preproces…
12 0.00316 roc_auc binary 0.814 25 0.00763 Preproces…
13 1 accuracy binary 0.855 25 0.00293 Preproces…
14 1 brier_class binary 0.124 25 0.00207 Preproces…
15 1 roc_auc binary 0.5 25 0 Preproces…
# Visualize performance with regularization parameter
lasso_grid %>%
collect_metrics() %>%
ggplot(aes(x = penalty, y = mean, color = .metric)) +
geom_errorbar(aes(ymin = mean - std_err, ymax = mean + std_err), alpha = 0.5) +
geom_line(linewidth = 1.5) +
facet_wrap(~ .metric, scales = "free", nrow = 2) +
scale_x_log10() +
theme(legend.position = "none")
# Select highest AUROC
highest_auroc <- lasso_grid %>%
select_best(metric = "roc_auc")
# Finalize workflow
wf_final <-
finalize_workflow(wf_tune,
highest_auroc)
# Visualize most important variables
wf_final %>%
fit(data = df_train) %>%
pull_workflow_fit() %>%
vi(lambda = highest_auroc$penalty) %>%
mutate(Importance = abs(Importance),
Variable = fct_reorder(Variable, Importance)) %>%
ggplot(aes(x = Importance, y = Variable, fill = Sign)) +
geom_col() +
scale_x_continuous(expand = c(0, 0)) +
labs(y = NULL)
Fit final best model on training set and evaluate on test set
# Fit the final best model to the training set and evaluate the test set
final_fit <- last_fit(wf_final,
df_split)
# Extract trained workflow from final_fit
wf_final_trained <- final_fit %>%
extract_workflow()
wf_final_trained
══ Workflow [trained] ════════════════════════════════════════════════
Preprocessor: Recipe
Model: logistic_reg()
── Preprocessor ──────────────────────────────────────────────────────
3 Recipe Steps
• step_impute_bag()
• step_normalize()
• step_dummy()
── Model ─────────────────────────────────────────────────────────────
Call: glmnet::glmnet(x = maybe_matrix(x), y = y, family = "binomial", alpha = ~1)
Df %Dev Lambda
1 0 0.00 0.125700
2 3 3.24 0.114500
3 3 6.65 0.104300
4 3 9.32 0.095060
5 3 11.47 0.086610
6 4 13.36 0.078920
7 4 15.28 0.071910
8 4 16.88 0.065520
9 4 18.21 0.059700
10 4 19.34 0.054400
11 4 20.30 0.049560
12 4 21.12 0.045160
13 4 21.82 0.041150
14 4 22.43 0.037490
15 4 22.94 0.034160
16 4 23.39 0.031130
17 4 23.77 0.028360
18 4 24.09 0.025840
19 4 24.37 0.023550
20 4 24.61 0.021450
21 4 24.82 0.019550
22 5 25.00 0.017810
23 5 25.16 0.016230
24 5 25.30 0.014790
25 5 25.42 0.013470
26 5 25.51 0.012280
27 5 25.60 0.011190
28 5 25.67 0.010190
29 5 25.73 0.009287
30 5 25.78 0.008462
31 5 25.82 0.007710
32 5 25.86 0.007025
33 5 25.89 0.006401
34 5 25.92 0.005833
35 5 25.94 0.005315
36 5 25.96 0.004842
37 5 25.97 0.004412
38 5 25.98 0.004020
39 5 26.00 0.003663
40 5 26.00 0.003338
41 5 26.01 0.003041
42 5 26.02 0.002771
43 5 26.02 0.002525
44 5 26.03 0.002301
45 5 26.03 0.002096
46 5 26.03 0.001910
...
and 7 more lines.
# saveRDS(wf_final_trained, "eIPMPost.rds")
# Extract trained model from final_fit
final_fit_mod <- final_fit %>%
extract_fit_parsnip()
final_fit_mod
parsnip model object
Call: glmnet::glmnet(x = maybe_matrix(x), y = y, family = "binomial", alpha = ~1)
Df %Dev Lambda
1 0 0.00 0.125700
2 3 3.24 0.114500
3 3 6.65 0.104300
4 3 9.32 0.095060
5 3 11.47 0.086610
6 4 13.36 0.078920
7 4 15.28 0.071910
8 4 16.88 0.065520
9 4 18.21 0.059700
10 4 19.34 0.054400
11 4 20.30 0.049560
12 4 21.12 0.045160
13 4 21.82 0.041150
14 4 22.43 0.037490
15 4 22.94 0.034160
16 4 23.39 0.031130
17 4 23.77 0.028360
18 4 24.09 0.025840
19 4 24.37 0.023550
20 4 24.61 0.021450
21 4 24.82 0.019550
22 5 25.00 0.017810
23 5 25.16 0.016230
24 5 25.30 0.014790
25 5 25.42 0.013470
26 5 25.51 0.012280
27 5 25.60 0.011190
28 5 25.67 0.010190
29 5 25.73 0.009287
30 5 25.78 0.008462
31 5 25.82 0.007710
32 5 25.86 0.007025
33 5 25.89 0.006401
34 5 25.92 0.005833
35 5 25.94 0.005315
36 5 25.96 0.004842
37 5 25.97 0.004412
38 5 25.98 0.004020
39 5 26.00 0.003663
40 5 26.00 0.003338
41 5 26.01 0.003041
42 5 26.02 0.002771
43 5 26.02 0.002525
44 5 26.03 0.002301
45 5 26.03 0.002096
46 5 26.03 0.001910
47 5 26.04 0.001740
48 5 26.04 0.001586
49 5 26.04 0.001445
50 5 26.04 0.001316
51 5 26.04 0.001199
52 5 26.04 0.001093
53 5 26.05 0.000996
# A tibble: 6 × 3
term estimate penalty
<chr> <dbl> <dbl>
1 (Intercept) -2.10 0.00316
2 disease_cat_Other -0.181 0.00316
3 anc_pre_ld_log10 -0.547 0.00316
4 plt_pre_ld_log10 -0.204 0.00316
5 ldh_pre_ld_log10 0.448 0.00316
6 ferritin_day_3_log10 0.752 0.00316
# # Test model against Shiny app output
# df_test <- tibble(
# disease_cat = "ALL",
# LD_regimen_low_CyFlu_vs_other = NA,
# anc_pre_ld = 1.5,
# plt_pre_ld = 175,
# ldh_pre_ld = 250,
# ferritin_pre_ld = NA,
# ferritin_day_0 = NA,
# ferritin_day_3 = 300,
# crp_day_3 = NA,
# d_dimer_day_3 = NA
# )
#
# df_test <- df_test %>%
# mutate(
# across(anc_pre_ld:d_dimer_day_3, ~ ifelse(. == 0, 0.01, .), .names = "{.col}_log10"),
# across(anc_pre_ld_log10:d_dimer_day_3_log10, log10)
# )
#
# predict(wf_final_trained, df_test, type = "prob")
# Compute model-specific variable importance scores for fitted model
final_fit %>%
extract_fit_parsnip() %>%
vip()
# Evaluate final model on test set
final_fit %>% collect_metrics()
# A tibble: 3 × 4
.metric .estimator .estimate .config
<chr> <chr> <dbl> <chr>
1 accuracy binary 0.889 Preprocessor1_Model1
2 roc_auc binary 0.879 Preprocessor1_Model1
3 brier_class binary 0.0846 Preprocessor1_Model1
# Plot calibration curve for test set
# Using {probably}
final_fit %>%
collect_predictions() %>%
cal_plot_breaks(
truth = icaht_grade_3_4,
estimate = .pred_1,
event_level = "second",
num_breaks = 5
)
# Using {runway}
# Augment df_test (i.e., add columns for predictions to df_train)
df_mod_post_ferritin_only <- wf_final_trained %>%
augment(new_data = df_test) %>%
mutate(.pred_class = as.numeric(as.character(.pred_class)))
p_cal_post_ferritin_only <- cal_plot(
df_mod_post_ferritin_only,
outcome = "icaht_grade_3_4",
positive = "1",
prediction = ".pred_1",
n_bins = 0,
show_loess = TRUE,
plot_title = "eIPMPost"
)
p_cal_post_ferritin_only
Method: maximize_metric
Predictor: .pred_1
Outcome: icaht_grade_3_4
Direction: >=
AUC n n_pos n_neg
0.8788 208 31 177
optimal_cutpoint youden acc sensitivity specificity tp fn fp tn
0.1534 0.6127 0.7837 0.8387 0.774 26 5 40 137
Predictor summary:
Data Min. 5% 1st Qu. Median Mean
Overall 0.006755491 0.01693932 0.03906943 0.09193796 0.1566729
0 0.006755491 0.01619801 0.03537370 0.07525395 0.1110887
1 0.050928947 0.10428795 0.17604793 0.29682932 0.4169440
3rd Qu. 95% Max. SD NAs
0.1940067 0.5855342 0.9967191 0.1883115 0
0.1356604 0.3218302 0.6542548 0.1112023 0
0.6370743 0.9626319 0.9967191 0.2997944 0
Build and train logistic regression model with LASSO regularization
# Create model formula
model_formula <-
icaht_grade_3_4 ~ disease_cat_Other + anc_pre_ld_log10 + plt_pre_ld_log10 + ldh_pre_ld_log10 +
ferritin_day_3_log10 + crp_day_3_log10
# Create model workflow (bundles recipe and model)
wf <- workflow() %>%
add_model(spec_model, formula = model_formula) %>%
add_recipe(recipe)
# Fit model on training data
fit_test <- wf %>%
fit(data = df_train)
# Extract estimated coefficients
fit_test %>%
extract_fit_parsnip() %>%
tidy()
# A tibble: 7 × 3
term estimate penalty
<chr> <dbl> <dbl>
1 (Intercept) -1.78 0.1
2 disease_cat_Other 0 0.1
3 anc_pre_ld_log10 -0.0683 0.1
4 plt_pre_ld_log10 -0.0710 0.1
5 ldh_pre_ld_log10 0 0.1
6 ferritin_day_3_log10 0.156 0.1
7 crp_day_3_log10 0 0.1
Tune LASSO parameters
# Tune grid using workflow object
doParallel::registerDoParallel()
wf_tune <- workflow() %>%
add_model(tune_spec, formula = model_formula) %>%
add_recipe(recipe)
set.seed(2023)
lasso_grid <- tune_grid(wf_tune,
resamples = bootstrap_resamples,
grid = lambda_grid)
lasso_grid %>% collect_metrics()
# A tibble: 15 × 7
penalty .metric .estimator mean n std_err .config
<dbl> <chr> <chr> <dbl> <int> <dbl> <chr>
1 0.0000000001 accuracy binary 0.873 25 0.00341 Preproces…
2 0.0000000001 brier_class binary 0.0967 25 0.00208 Preproces…
3 0.0000000001 roc_auc binary 0.822 25 0.00747 Preproces…
4 0.0000000316 accuracy binary 0.873 25 0.00341 Preproces…
5 0.0000000316 brier_class binary 0.0967 25 0.00208 Preproces…
6 0.0000000316 roc_auc binary 0.822 25 0.00747 Preproces…
7 0.00001 accuracy binary 0.873 25 0.00341 Preproces…
8 0.00001 brier_class binary 0.0967 25 0.00208 Preproces…
9 0.00001 roc_auc binary 0.822 25 0.00747 Preproces…
10 0.00316 accuracy binary 0.874 25 0.00321 Preproces…
11 0.00316 brier_class binary 0.0964 25 0.00202 Preproces…
12 0.00316 roc_auc binary 0.822 25 0.00760 Preproces…
13 1 accuracy binary 0.855 25 0.00293 Preproces…
14 1 brier_class binary 0.124 25 0.00207 Preproces…
15 1 roc_auc binary 0.5 25 0 Preproces…
# Visualize performance with regularization parameter
lasso_grid %>%
collect_metrics() %>%
ggplot(aes(x = penalty, y = mean, color = .metric)) +
geom_errorbar(aes(ymin = mean - std_err, ymax = mean + std_err), alpha = 0.5) +
geom_line(linewidth = 1.5) +
facet_wrap(~ .metric, scales = "free", nrow = 2) +
scale_x_log10() +
theme(legend.position = "none")
# Select highest AUROC
highest_auroc <- lasso_grid %>%
select_best(metric = "roc_auc")
# Finalize workflow
wf_final <-
finalize_workflow(wf_tune,
highest_auroc)
# Visualize most important variables
wf_final %>%
fit(data = df_train) %>%
pull_workflow_fit() %>%
vi(lambda = highest_auroc$penalty) %>%
mutate(Importance = abs(Importance),
Variable = fct_reorder(Variable, Importance)) %>%
ggplot(aes(x = Importance, y = Variable, fill = Sign)) +
geom_col() +
scale_x_continuous(expand = c(0, 0)) +
labs(y = NULL)
Fit final best model on training set and evaluate on test set
# Fit the final best model to the training set and evaluate the test set
final_fit <- last_fit(wf_final,
df_split)
# Extract trained workflow from final_fit
wf_final_trained <- final_fit %>%
extract_workflow()
wf_final_trained
══ Workflow [trained] ════════════════════════════════════════════════
Preprocessor: Recipe
Model: logistic_reg()
── Preprocessor ──────────────────────────────────────────────────────
3 Recipe Steps
• step_impute_bag()
• step_normalize()
• step_dummy()
── Model ─────────────────────────────────────────────────────────────
Call: glmnet::glmnet(x = maybe_matrix(x), y = y, family = "binomial", alpha = ~1)
Df %Dev Lambda
1 0 0.00 0.125700
2 3 3.24 0.114500
3 3 6.65 0.104300
4 3 9.32 0.095060
5 3 11.47 0.086610
6 4 13.36 0.078920
7 4 15.28 0.071910
8 4 16.88 0.065520
9 4 18.21 0.059700
10 4 19.34 0.054400
11 5 20.41 0.049560
12 5 21.39 0.045160
13 5 22.24 0.041150
14 5 22.97 0.037490
15 5 23.60 0.034160
16 5 24.15 0.031130
17 5 24.62 0.028360
18 5 25.02 0.025840
19 5 25.37 0.023550
20 6 25.70 0.021450
21 6 25.98 0.019550
22 6 26.23 0.017810
23 6 26.44 0.016230
24 6 26.62 0.014790
25 6 26.77 0.013470
26 6 26.90 0.012280
27 6 27.01 0.011190
28 6 27.11 0.010190
29 6 27.19 0.009287
30 6 27.26 0.008462
31 6 27.32 0.007710
32 6 27.37 0.007025
33 6 27.41 0.006401
34 6 27.44 0.005833
35 6 27.47 0.005315
36 6 27.50 0.004842
37 6 27.52 0.004412
38 6 27.54 0.004020
39 6 27.55 0.003663
40 6 27.56 0.003338
41 6 27.57 0.003041
42 6 27.58 0.002771
43 6 27.59 0.002525
44 6 27.60 0.002301
45 6 27.60 0.002096
46 6 27.61 0.001910
...
and 8 more lines.
# Extract trained model from final_fit
final_fit_mod <- final_fit %>%
extract_fit_parsnip()
final_fit_mod
parsnip model object
Call: glmnet::glmnet(x = maybe_matrix(x), y = y, family = "binomial", alpha = ~1)
Df %Dev Lambda
1 0 0.00 0.125700
2 3 3.24 0.114500
3 3 6.65 0.104300
4 3 9.32 0.095060
5 3 11.47 0.086610
6 4 13.36 0.078920
7 4 15.28 0.071910
8 4 16.88 0.065520
9 4 18.21 0.059700
10 4 19.34 0.054400
11 5 20.41 0.049560
12 5 21.39 0.045160
13 5 22.24 0.041150
14 5 22.97 0.037490
15 5 23.60 0.034160
16 5 24.15 0.031130
17 5 24.62 0.028360
18 5 25.02 0.025840
19 5 25.37 0.023550
20 6 25.70 0.021450
21 6 25.98 0.019550
22 6 26.23 0.017810
23 6 26.44 0.016230
24 6 26.62 0.014790
25 6 26.77 0.013470
26 6 26.90 0.012280
27 6 27.01 0.011190
28 6 27.11 0.010190
29 6 27.19 0.009287
30 6 27.26 0.008462
31 6 27.32 0.007710
32 6 27.37 0.007025
33 6 27.41 0.006401
34 6 27.44 0.005833
35 6 27.47 0.005315
36 6 27.50 0.004842
37 6 27.52 0.004412
38 6 27.54 0.004020
39 6 27.55 0.003663
40 6 27.56 0.003338
41 6 27.57 0.003041
42 6 27.58 0.002771
43 6 27.59 0.002525
44 6 27.60 0.002301
45 6 27.60 0.002096
46 6 27.61 0.001910
47 6 27.61 0.001740
48 6 27.61 0.001586
49 6 27.61 0.001445
50 6 27.62 0.001316
51 6 27.62 0.001199
52 6 27.62 0.001093
53 6 27.62 0.000996
54 6 27.62 0.000907
# A tibble: 7 × 3
term estimate penalty
<chr> <dbl> <dbl>
1 (Intercept) -2.07 0.0000000001
2 disease_cat_Other -0.334 0.0000000001
3 anc_pre_ld_log10 -0.561 0.0000000001
4 plt_pre_ld_log10 -0.250 0.0000000001
5 ldh_pre_ld_log10 0.432 0.0000000001
6 ferritin_day_3_log10 0.553 0.0000000001
7 crp_day_3_log10 0.511 0.0000000001
# Evaluate final model on test set
final_fit %>% collect_metrics()
# A tibble: 3 × 4
.metric .estimator .estimate .config
<chr> <chr> <dbl> <chr>
1 accuracy binary 0.889 Preprocessor1_Model1
2 roc_auc binary 0.867 Preprocessor1_Model1
3 brier_class binary 0.0881 Preprocessor1_Model1
# Plot calibration curve for test set
# Using {probably}
final_fit %>%
collect_predictions() %>%
cal_plot_breaks(
truth = icaht_grade_3_4,
estimate = .pred_1,
event_level = "second",
num_breaks = 5
)
# Using {runway}
# Augment df_test (i.e., add columns for predictions to df_train)
df_mod_post_crp <- wf_final_trained %>%
augment(new_data = df_test) %>%
mutate(.pred_class = as.numeric(as.character(.pred_class)))
p_cal_post_crp <- cal_plot(
df_mod_post_crp,
outcome = "icaht_grade_3_4",
positive = "1",
prediction = ".pred_1",
n_bins = 0,
show_loess = TRUE,
plot_title = "eIPMPost (+ day +3 CRP)"
)
p_cal_post_crp
Method: maximize_metric
Predictor: .pred_1
Outcome: icaht_grade_3_4
Direction: >=
AUC n n_pos n_neg
0.8666 208 31 177
optimal_cutpoint youden acc sensitivity specificity tp fn fp tn
0.2227 0.6063 0.8462 0.7419 0.8644 23 8 24 153
Predictor summary:
Data Min. 5% 1st Qu. Median Mean
Overall 0.004469136 0.01111110 0.03726027 0.08967302 0.1564522
0 0.004469136 0.01074218 0.03214660 0.06861536 0.1116378
1 0.049842877 0.10034928 0.19044590 0.27900757 0.4123281
3rd Qu. 95% Max. SD NAs
0.1814052 0.6403928 0.9971253 0.1941113 0
0.1453168 0.3434017 0.7347372 0.1217198 0
0.6948509 0.9706088 0.9971253 0.3058815 0
Build and train logistic regression model with LASSO regularization
# Create model formula
model_formula <-
icaht_grade_3_4 ~ disease_cat_Other + anc_pre_ld_log10 + plt_pre_ld_log10 + ldh_pre_ld_log10 +
ferritin_day_3_log10 + d_dimer_day_3_log10
# Create model workflow (bundles recipe and model)
wf <- workflow() %>%
add_model(spec_model, formula = model_formula) %>%
add_recipe(recipe)
# Fit model on training data
fit_test <- wf %>%
fit(data = df_train)
# Extract estimated coefficients
fit_test %>%
extract_fit_parsnip() %>%
tidy()
# A tibble: 7 × 3
term estimate penalty
<chr> <dbl> <dbl>
1 (Intercept) -1.78 0.1
2 disease_cat_Other 0 0.1
3 anc_pre_ld_log10 -0.0683 0.1
4 plt_pre_ld_log10 -0.0710 0.1
5 ldh_pre_ld_log10 0 0.1
6 ferritin_day_3_log10 0.156 0.1
7 d_dimer_day_3_log10 0 0.1
Tune LASSO parameters
# Tune grid using workflow object
doParallel::registerDoParallel()
wf_tune <- workflow() %>%
add_model(tune_spec, formula = model_formula) %>%
add_recipe(recipe)
set.seed(2023)
lasso_grid <- tune_grid(wf_tune,
resamples = bootstrap_resamples,
grid = lambda_grid)
lasso_grid %>% collect_metrics()
# A tibble: 15 × 7
penalty .metric .estimator mean n std_err .config
<dbl> <chr> <chr> <dbl> <int> <dbl> <chr>
1 0.0000000001 accuracy binary 0.874 25 0.00325 Preproces…
2 0.0000000001 brier_class binary 0.0979 25 0.00215 Preproces…
3 0.0000000001 roc_auc binary 0.807 25 0.00797 Preproces…
4 0.0000000316 accuracy binary 0.874 25 0.00325 Preproces…
5 0.0000000316 brier_class binary 0.0979 25 0.00215 Preproces…
6 0.0000000316 roc_auc binary 0.807 25 0.00797 Preproces…
7 0.00001 accuracy binary 0.874 25 0.00325 Preproces…
8 0.00001 brier_class binary 0.0979 25 0.00215 Preproces…
9 0.00001 roc_auc binary 0.807 25 0.00797 Preproces…
10 0.00316 accuracy binary 0.874 25 0.00325 Preproces…
11 0.00316 brier_class binary 0.0975 25 0.00209 Preproces…
12 0.00316 roc_auc binary 0.808 25 0.00800 Preproces…
13 1 accuracy binary 0.855 25 0.00293 Preproces…
14 1 brier_class binary 0.124 25 0.00207 Preproces…
15 1 roc_auc binary 0.5 25 0 Preproces…
# Visualize performance with regularization parameter
lasso_grid %>%
collect_metrics() %>%
ggplot(aes(x = penalty, y = mean, color = .metric)) +
geom_errorbar(aes(ymin = mean - std_err, ymax = mean + std_err), alpha = 0.5) +
geom_line(linewidth = 1.5) +
facet_wrap(~ .metric, scales = "free", nrow = 2) +
scale_x_log10() +
theme(legend.position = "none")
# Select highest AUROC
highest_auroc <- lasso_grid %>%
select_best(metric = "roc_auc")
# Finalize workflow
wf_final <-
finalize_workflow(wf_tune,
highest_auroc)
# Visualize most important variables
wf_final %>%
fit(data = df_train) %>%
pull_workflow_fit() %>%
vi(lambda = highest_auroc$penalty) %>%
mutate(Importance = abs(Importance),
Variable = fct_reorder(Variable, Importance)) %>%
ggplot(aes(x = Importance, y = Variable, fill = Sign)) +
geom_col() +
scale_x_continuous(expand = c(0, 0)) +
labs(y = NULL)
Fit final best model on training set and evaluate on test set
# Fit the final best model to the training set and evaluate the test set
final_fit <- last_fit(wf_final,
df_split)
# Extract trained workflow from final_fit
wf_final_trained <- final_fit %>%
extract_workflow()
wf_final_trained
══ Workflow [trained] ════════════════════════════════════════════════
Preprocessor: Recipe
Model: logistic_reg()
── Preprocessor ──────────────────────────────────────────────────────
3 Recipe Steps
• step_impute_bag()
• step_normalize()
• step_dummy()
── Model ─────────────────────────────────────────────────────────────
Call: glmnet::glmnet(x = maybe_matrix(x), y = y, family = "binomial", alpha = ~1)
Df %Dev Lambda
1 0 0.00 0.125700
2 3 3.24 0.114500
3 3 6.65 0.104300
4 3 9.32 0.095060
5 3 11.47 0.086610
6 4 13.36 0.078920
7 4 15.28 0.071910
8 4 16.88 0.065520
9 4 18.21 0.059700
10 4 19.34 0.054400
11 4 20.30 0.049560
12 4 21.12 0.045160
13 4 21.82 0.041150
14 4 22.43 0.037490
15 4 22.94 0.034160
16 5 23.40 0.031130
17 5 23.81 0.028360
18 5 24.17 0.025840
19 5 24.47 0.023550
20 5 24.72 0.021450
21 6 24.96 0.019550
22 6 25.17 0.017810
23 6 25.35 0.016230
24 6 25.50 0.014790
25 6 25.63 0.013470
26 6 25.74 0.012280
27 6 25.84 0.011190
28 6 25.92 0.010190
29 6 25.98 0.009287
30 6 26.04 0.008462
31 6 26.09 0.007710
32 6 26.13 0.007025
33 6 26.16 0.006401
34 6 26.19 0.005833
35 6 26.22 0.005315
36 6 26.24 0.004842
37 6 26.25 0.004412
38 6 26.27 0.004020
39 6 26.28 0.003663
40 6 26.29 0.003338
41 6 26.30 0.003041
42 6 26.31 0.002771
43 6 26.31 0.002525
44 6 26.32 0.002301
45 6 26.32 0.002096
46 6 26.32 0.001910
...
and 7 more lines.
# Extract trained model from final_fit
final_fit_mod <- final_fit %>%
extract_fit_parsnip()
final_fit_mod
parsnip model object
Call: glmnet::glmnet(x = maybe_matrix(x), y = y, family = "binomial", alpha = ~1)
Df %Dev Lambda
1 0 0.00 0.125700
2 3 3.24 0.114500
3 3 6.65 0.104300
4 3 9.32 0.095060
5 3 11.47 0.086610
6 4 13.36 0.078920
7 4 15.28 0.071910
8 4 16.88 0.065520
9 4 18.21 0.059700
10 4 19.34 0.054400
11 4 20.30 0.049560
12 4 21.12 0.045160
13 4 21.82 0.041150
14 4 22.43 0.037490
15 4 22.94 0.034160
16 5 23.40 0.031130
17 5 23.81 0.028360
18 5 24.17 0.025840
19 5 24.47 0.023550
20 5 24.72 0.021450
21 6 24.96 0.019550
22 6 25.17 0.017810
23 6 25.35 0.016230
24 6 25.50 0.014790
25 6 25.63 0.013470
26 6 25.74 0.012280
27 6 25.84 0.011190
28 6 25.92 0.010190
29 6 25.98 0.009287
30 6 26.04 0.008462
31 6 26.09 0.007710
32 6 26.13 0.007025
33 6 26.16 0.006401
34 6 26.19 0.005833
35 6 26.22 0.005315
36 6 26.24 0.004842
37 6 26.25 0.004412
38 6 26.27 0.004020
39 6 26.28 0.003663
40 6 26.29 0.003338
41 6 26.30 0.003041
42 6 26.31 0.002771
43 6 26.31 0.002525
44 6 26.32 0.002301
45 6 26.32 0.002096
46 6 26.32 0.001910
47 6 26.33 0.001740
48 6 26.33 0.001586
49 6 26.33 0.001445
50 6 26.33 0.001316
51 6 26.33 0.001199
52 6 26.34 0.001093
53 6 26.34 0.000996
# A tibble: 7 × 3
term estimate penalty
<chr> <dbl> <dbl>
1 (Intercept) -2.01 0.00316
2 disease_cat_Other -0.271 0.00316
3 anc_pre_ld_log10 -0.546 0.00316
4 plt_pre_ld_log10 -0.205 0.00316
5 ldh_pre_ld_log10 0.434 0.00316
6 ferritin_day_3_log10 0.650 0.00316
7 d_dimer_day_3_log10 0.161 0.00316
# Evaluate final model on test set
final_fit %>% collect_metrics()
# A tibble: 3 × 4
.metric .estimator .estimate .config
<chr> <chr> <dbl> <chr>
1 accuracy binary 0.885 Preprocessor1_Model1
2 roc_auc binary 0.877 Preprocessor1_Model1
3 brier_class binary 0.0853 Preprocessor1_Model1
# Plot calibration curve for test set
# Using {probably}
final_fit %>%
collect_predictions() %>%
cal_plot_breaks(
truth = icaht_grade_3_4,
estimate = .pred_1,
event_level = "second",
num_breaks = 5
)
# Using {runway}
# Augment df_test (i.e., add columns for predictions to df_train)
df_mod_post_d_dimer <- wf_final_trained %>%
augment(new_data = df_test) %>%
mutate(.pred_class = as.numeric(as.character(.pred_class)))
p_cal_post_d_dimer <- cal_plot(
df_mod_post_d_dimer,
outcome = "icaht_grade_3_4",
positive = "1",
prediction = ".pred_1",
n_bins = 0,
show_loess = TRUE,
plot_title = "eIPMPost (+ day +3 D-dimer)"
)
p_cal_post_d_dimer
Method: maximize_metric
Predictor: .pred_1
Outcome: icaht_grade_3_4
Direction: >=
AUC n n_pos n_neg
0.8772 208 31 177
optimal_cutpoint youden acc sensitivity specificity tp fn fp tn
0.2171 0.6176 0.8558 0.7419 0.8757 23 8 22 155
Predictor summary:
Data Min. 5% 1st Qu. Median Mean
Overall 0.007766116 0.01750785 0.03889264 0.08848150 0.1573556
0 0.007766116 0.01738958 0.03607295 0.07234603 0.1120083
1 0.044616841 0.10484003 0.18758762 0.30055495 0.4162737
3rd Qu. 95% Max. SD NAs
0.1836877 0.5839416 0.9969261 0.1891879 0
0.1511257 0.3616618 0.6807138 0.1143803 0
0.6312323 0.9626119 0.9969261 0.2980046 0
df_mod_all <- df_mod_pre_ferritin_pre_ld %>%
rename(predictions_pre_ferritin_pre_ld = .pred_1) %>%
left_join(
.,
df_mod_pre_ferritin_pre_ld_LD_regimen %>% select(upn, predictions_pre_ferritin_pre_ld_LD_regimen = .pred_1),
by = "upn"
) %>%
left_join(.,
df_mod_post_crp %>% select(upn, predictions_post_crp = .pred_1),
by = "upn") %>%
left_join(
.,
df_mod_post_d_dimer %>% select(upn, predictions_post_d_dimer = .pred_1),
by = "upn"
) %>%
left_join(
.,
df_mod_post_ferritin_only %>% select(upn, predictions_post_ferritin_only = .pred_1),
by = "upn"
) %>%
select(
upn,
icaht_grade_3_4,
predictions_pre_ferritin_pre_ld,
predictions_pre_ferritin_pre_ld_LD_regimen,
predictions_post_ferritin_only,
predictions_post_crp,
predictions_post_d_dimer
)
dcurves::dca(
icaht_grade_3_4 ~ predictions_pre_ferritin_pre_ld + predictions_post_ferritin_only,
data = df_mod_all,
thresholds = seq(0, 0.35, by = 0.01),
label = list(
predictions_pre_ferritin_pre_ld = "eIPMPre",
predictions_post_ferritin_only = "eIPMPost"
)
) %>%
plot(smooth = TRUE)
dcurves::dca(
icaht_grade_3_4 ~ predictions_pre_ferritin_pre_ld + predictions_pre_ferritin_pre_ld_LD_regimen +
predictions_post_ferritin_only + predictions_post_crp + predictions_post_d_dimer,
data = df_mod_all,
thresholds = seq(0, 0.35, by = 0.01),
label = list(
predictions_pre_ferritin_pre_ld = "eIPMPre",
predictions_pre_ferritin_pre_ld_LD_regimen = "eIPMPre (+ LD regimen)",
predictions_post_ferritin_only = "eIPMPost",
predictions_post_crp ="eIPMPost (+ day +3 CRP)",
predictions_post_d_dimer ="eIPMPost (+ day +3 D-dimer)"
)
) %>%
plot(smooth = TRUE)
p <- p_roc_pre_ferritin_pre_ld + p_roc_pre_ferritin_pre_ld_LD_regimen + p_roc_post_ferritin_only +
p_roc_post_crp + p_roc_post_d_dimer
p + plot_layout(ncol = 2) + plot_annotation(tag_levels = "A")
df %>%
filter(upn %in% df_train$upn) %>%
select(
cart_age,
gender_desc,
racenih,
ethnih,
prior_hct,
disease_cat,
anc_pre_ld,
hb_pre_ld,
plt_pre_ld,
LD_regimen_cat,
product_infused,
product_cat,
costim_domain
) %>%
tbl_summary(
missing_text = "Missing",
type = list(all_continuous() ~ "continuous2"),
statistic = list(
all_continuous() ~ c("{median} ({p25}, {p75})", "{min} - {max}"),
all_categorical() ~ "{n} ({p}%)"
),
digits = all_categorical() ~ 0,
sort = list(everything() ~ "frequency")
) %>%
bold_labels()
| Characteristic | N = 4831 |
|---|---|
| Age | |
| Median (IQR) | 62 (50, 68) |
| Range | 21 - 84 |
| Sex | |
| Male | 306 (63%) |
| Female | 177 (37%) |
| Race | |
| White | 413 (86%) |
| Asian | 35 (7%) |
| Black or African American | 13 (3%) |
| American Indian or Alaska Native | 11 (2%) |
| Multiple | 6 (1%) |
| Unknown | 3 (1%) |
| Native Hawaiian or other Pacific Islander | 2 (0%) |
| Ethnicity | |
| Not Hispanic/Latino or Unknown | 446 (92%) |
| Hispanic or Latino | 37 (8%) |
| Prior HCT | 161 (33%) |
| Disease | |
| Aggressive NHL | 216 (45%) |
| Indolent NHL | 110 (23%) |
| MM/PCL | 79 (16%) |
| ALL | 78 (16%) |
| Pre-LD ANC | |
| Median (IQR) | 2.86 (1.84, 4.22) |
| Range | 0.00 - 23.17 |
| Missing | 1 |
| Pre-LD Hb | |
| Median (IQR) | 10.70 (9.50, 12.25) |
| Range | 7.30 - 17.10 |
| Pre-LD platelet count | |
| Median (IQR) | 146 (94, 203) |
| Range | 6 - 790 |
| Lymphodepletion regimen | |
| Low-intensity Cy/Flu | 284 (59%) |
| High-intensity Cy/Flu | 175 (36%) |
| Other | 24 (5%) |
| CAR T-cell product | |
| JCAR014 | 134 (28%) |
| YESCARTA | 94 (19%) |
| BREYANZI | 64 (13%) |
| CARVYKTI | 35 (7%) |
| Investigational CD20 product | 34 (7%) |
| JCAR021 | 33 (7%) |
| TECARTUS | 32 (7%) |
| ABECMA | 22 (5%) |
| Investigational BCMA product | 22 (5%) |
| KYMRIAH | 13 (3%) |
| CAR T-cell product category | |
| Investigational CAR T-cell product | 223 (46%) |
| Commercial CD19 CAR T-cell product with CD28 costimulatory domain (axi-cel, brexu-cel) | 126 (26%) |
| Commercial CD19 CAR T-cell product with 4-1BB costimulatory domain (liso-cel, tisa-cel) | 77 (16%) |
| Commercial BCMA CAR T-cell product (cilta-cel, ide-cel) | 57 (12%) |
| Costimulatory domain | |
| 4-1BB | 323 (67%) |
| CD28 | 126 (26%) |
| CD28-41BB | 34 (7%) |
| 1 n (%) | |
df %>%
filter(upn %in% df_test$upn) %>%
select(
cart_age,
gender_desc,
racenih,
ethnih,
prior_hct,
disease_cat,
anc_pre_ld,
hb_pre_ld,
plt_pre_ld,
LD_regimen_cat,
product_infused,
product_cat,
costim_domain
) %>%
tbl_summary(
missing_text = "Missing",
type = list(all_continuous() ~ "continuous2"),
statistic = list(
all_continuous() ~ c("{median} ({p25}, {p75})", "{min} - {max}"),
all_categorical() ~ "{n} ({p}%)"
),
digits = all_categorical() ~ 0,
sort = list(everything() ~ "frequency")
) %>%
bold_labels()
| Characteristic | N = 2081 |
|---|---|
| Age | |
| Median (IQR) | 61 (53, 67) |
| Range | 19 - 79 |
| Sex | |
| Male | 131 (63%) |
| Female | 77 (37%) |
| Race | |
| White | 170 (82%) |
| Asian | 23 (11%) |
| Black or African American | 5 (2%) |
| Unknown | 4 (2%) |
| American Indian or Alaska Native | 2 (1%) |
| Multiple | 2 (1%) |
| Native Hawaiian or other Pacific Islander | 2 (1%) |
| Ethnicity | |
| Not Hispanic/Latino or Unknown | 193 (93%) |
| Hispanic or Latino | 15 (7%) |
| Prior HCT | 73 (35%) |
| Disease | |
| Aggressive NHL | 102 (49%) |
| Indolent NHL | 44 (21%) |
| MM/PCL | 41 (20%) |
| ALL | 21 (10%) |
| Pre-LD ANC | |
| Median (IQR) | 2.77 (1.58, 4.60) |
| Range | 0.02 - 54.03 |
| Pre-LD Hb | |
| Median (IQR) | 10.70 (9.60, 12.42) |
| Range | 7.00 - 15.60 |
| Pre-LD platelet count | |
| Median (IQR) | 139 (82, 201) |
| Range | 7 - 453 |
| Lymphodepletion regimen | |
| Low-intensity Cy/Flu | 119 (57%) |
| High-intensity Cy/Flu | 78 (38%) |
| Other | 11 (5%) |
| CAR T-cell product | |
| JCAR014 | 59 (28%) |
| YESCARTA | 46 (22%) |
| BREYANZI | 23 (11%) |
| CARVYKTI | 17 (8%) |
| Investigational BCMA product | 16 (8%) |
| TECARTUS | 15 (7%) |
| Investigational CD20 product | 11 (5%) |
| JCAR021 | 11 (5%) |
| ABECMA | 8 (4%) |
| KYMRIAH | 2 (1%) |
| CAR T-cell product category | |
| Investigational CAR T-cell product | 97 (47%) |
| Commercial CD19 CAR T-cell product with CD28 costimulatory domain (axi-cel, brexu-cel) | 61 (29%) |
| Commercial BCMA CAR T-cell product (cilta-cel, ide-cel) | 25 (12%) |
| Commercial CD19 CAR T-cell product with 4-1BB costimulatory domain (liso-cel, tisa-cel) | 25 (12%) |
| Costimulatory domain | |
| 4-1BB | 136 (65%) |
| CD28 | 61 (29%) |
| CD28-41BB | 11 (5%) |
| 1 n (%) | |
df %>%
filter(upn %in% df_train$upn) %>%
plyr::summarize(
anc_pre_ld_5p = quantile(anc_pre_ld, 0.05, na.rm = TRUE),
plt_pre_ld_5p = quantile(plt_pre_ld, 0.05, na.rm = TRUE),
ldh_pre_ld_5p = quantile(ldh_pre_ld, 0.05, na.rm = TRUE),
ferritin_pre_ld_5p = quantile(ferritin_pre_ld, 0.05, na.rm = TRUE),
ferritin_day_3_5p = quantile(ferritin_day_3, 0.05, na.rm = TRUE)
)
anc_pre_ld_5p plt_pre_ld_5p ldh_pre_ld_5p ferritin_pre_ld_5p
1 0.71 24.1 114.35 27.1
ferritin_day_3_5p
1 68
df %>%
filter(upn %in% df_train$upn) %>%
plyr::summarize(
anc_pre_ld_95p = quantile(anc_pre_ld, 0.95, na.rm = TRUE),
plt_pre_ld_95p = quantile(plt_pre_ld, 0.95, na.rm = TRUE),
ldh_pre_ld_95p = quantile(ldh_pre_ld, 0.95, na.rm = TRUE),
ferritin_pre_ld_95p = quantile(ferritin_pre_ld, 0.95, na.rm = TRUE),
ferritin_day_3_95p = quantile(ferritin_day_3, 0.95, na.rm = TRUE)
)
anc_pre_ld_95p plt_pre_ld_95p ldh_pre_ld_95p ferritin_pre_ld_95p
1 7.9815 287.4 644.75 3904.1
ferritin_day_3_95p
1 4931.45
devtools::session_info()
─ Session info ─────────────────────────────────────────────────────
setting value
version R version 4.3.3 (2024-02-29)
os macOS Sonoma 14.5
system aarch64, darwin20
ui X11
language (EN)
collate en_US.UTF-8
ctype en_US.UTF-8
tz America/Los_Angeles
date 2024-07-02
pandoc 3.1.11 @ /Applications/RStudio.app/Contents/Resources/app/quarto/bin/tools/aarch64/ (via rmarkdown)
─ Packages ─────────────────────────────────────────────────────────
package * version date (UTC) lib source
backports 1.4.1 2021-12-13 [1] CRAN (R 4.3.0)
base64enc 0.1-3 2015-07-28 [1] CRAN (R 4.3.0)
broom * 1.0.5 2023-06-09 [1] CRAN (R 4.3.0)
broom.helpers 1.14.0 2023-08-07 [1] CRAN (R 4.3.0)
bslib 0.7.0 2024-03-29 [1] CRAN (R 4.3.1)
cachem 1.0.8 2023-05-01 [1] CRAN (R 4.3.0)
caret * 6.0-94 2023-03-21 [1] CRAN (R 4.3.0)
checkmate 2.3.1 2023-12-04 [1] CRAN (R 4.3.1)
class 7.3-22 2023-05-03 [1] CRAN (R 4.3.3)
cli 3.6.2 2023-12-11 [1] CRAN (R 4.3.1)
cluster 2.1.6 2023-12-01 [1] CRAN (R 4.3.3)
codetools 0.2-19 2023-02-01 [1] CRAN (R 4.3.3)
colorspace 2.1-0 2023-01-23 [1] CRAN (R 4.3.0)
commonmark 1.9.1 2024-01-30 [1] CRAN (R 4.3.1)
crosstalk 1.2.1 2023-11-23 [1] CRAN (R 4.3.1)
cutpointr * 1.1.2 2022-04-13 [1] CRAN (R 4.3.0)
data.table 1.15.4 2024-03-30 [1] CRAN (R 4.3.1)
dcurves * 0.4.0 2022-12-23 [1] CRAN (R 4.3.0)
devtools 2.4.5 2022-10-11 [1] CRAN (R 4.3.0)
dials * 1.2.1 2024-02-22 [1] CRAN (R 4.3.1)
DiceDesign 1.10 2023-12-07 [1] CRAN (R 4.3.1)
digest 0.6.35 2024-03-11 [1] CRAN (R 4.3.1)
distill * 1.6 2023-10-06 [1] CRAN (R 4.3.1)
doParallel 1.0.17 2022-02-07 [1] CRAN (R 4.3.0)
downlit 0.4.3 2023-06-29 [1] CRAN (R 4.3.0)
dplyr * 1.1.4 2023-11-17 [1] CRAN (R 4.3.1)
e1071 1.7-14 2023-12-06 [1] CRAN (R 4.3.1)
ellipsis 0.3.2 2021-04-29 [1] CRAN (R 4.3.0)
evaluate 0.23 2023-11-01 [1] CRAN (R 4.3.1)
fansi 1.0.6 2023-12-08 [1] CRAN (R 4.3.1)
farver 2.1.1 2022-07-06 [1] CRAN (R 4.3.0)
fastmap 1.1.1 2023-02-24 [1] CRAN (R 4.3.0)
forcats * 1.0.0 2023-01-29 [1] CRAN (R 4.3.0)
foreach 1.5.2 2022-02-02 [1] CRAN (R 4.3.0)
foreign 0.8-86 2023-11-28 [1] CRAN (R 4.3.3)
Formula 1.2-5 2023-02-24 [1] CRAN (R 4.3.0)
fs 1.6.4 2024-04-25 [1] CRAN (R 4.3.1)
furrr 0.3.1 2022-08-15 [1] CRAN (R 4.3.0)
future 1.33.2 2024-03-26 [1] CRAN (R 4.3.1)
future.apply 1.11.2 2024-03-28 [1] CRAN (R 4.3.1)
generics 0.1.3 2022-07-05 [1] CRAN (R 4.3.0)
ggeasy 0.1.4 2023-03-12 [1] CRAN (R 4.3.0)
ggplot2 * 3.5.0 2024-02-23 [1] CRAN (R 4.3.1)
ggrepel * 0.9.5 2024-01-10 [1] CRAN (R 4.3.1)
ggsurvfit * 1.0.0 2023-10-31 [1] CRAN (R 4.3.1)
glmnet * 4.1-8 2023-08-22 [1] CRAN (R 4.3.0)
globals 0.16.3 2024-03-08 [1] CRAN (R 4.3.1)
glue 1.7.0 2024-01-09 [1] CRAN (R 4.3.1)
gower 1.0.1 2022-12-22 [1] CRAN (R 4.3.0)
GPfit 1.0-8 2019-02-08 [1] CRAN (R 4.3.0)
gridExtra 2.3 2017-09-09 [1] CRAN (R 4.3.0)
gt 0.10.1 2024-01-17 [1] CRAN (R 4.3.1)
gtable 0.3.4 2023-08-21 [1] CRAN (R 4.3.0)
gtsummary * 1.7.2 2023-07-15 [1] CRAN (R 4.3.0)
hardhat 1.3.1 2024-02-02 [1] CRAN (R 4.3.1)
haven 2.5.4 2023-11-30 [1] CRAN (R 4.3.1)
heatwaveR 0.4.6 2021-10-27 [1] CRAN (R 4.3.0)
here * 1.0.1 2020-12-13 [1] CRAN (R 4.3.0)
highr 0.10 2022-12-22 [1] CRAN (R 4.3.0)
Hmisc * 5.1-2 2024-03-11 [1] CRAN (R 4.3.1)
hms 1.1.3 2023-03-21 [1] CRAN (R 4.3.0)
htmlTable 2.4.2 2023-10-29 [1] CRAN (R 4.3.1)
htmltools 0.5.8.1 2024-04-04 [1] CRAN (R 4.3.1)
htmlwidgets 1.6.4 2023-12-06 [1] CRAN (R 4.3.1)
httpuv 1.6.15 2024-03-26 [1] CRAN (R 4.3.1)
httr 1.4.7 2023-08-15 [1] CRAN (R 4.3.0)
infer * 1.0.7 2024-03-25 [1] CRAN (R 4.3.1)
ipred 0.9-14 2023-03-09 [1] CRAN (R 4.3.0)
iterators 1.0.14 2022-02-05 [1] CRAN (R 4.3.0)
janitor * 2.2.0 2023-02-02 [1] CRAN (R 4.3.0)
jquerylib 0.1.4 2021-04-26 [1] CRAN (R 4.3.0)
jsonlite 1.8.8 2023-12-04 [1] CRAN (R 4.3.1)
knitr * 1.45 2023-10-30 [1] CRAN (R 4.3.1)
labeling 0.4.3 2023-08-29 [1] CRAN (R 4.3.0)
labelled * 2.12.0 2023-06-21 [1] CRAN (R 4.3.0)
later 1.3.2 2023-12-06 [1] CRAN (R 4.3.1)
lattice * 0.22-5 2023-10-24 [1] CRAN (R 4.3.3)
lava 1.8.0 2024-03-05 [1] CRAN (R 4.3.1)
lazyeval 0.2.2 2019-03-15 [1] CRAN (R 4.3.0)
lhs 1.1.6 2022-12-17 [1] CRAN (R 4.3.0)
lifecycle 1.0.4 2023-11-07 [1] CRAN (R 4.3.1)
listenv 0.9.1 2024-01-29 [1] CRAN (R 4.3.1)
lubridate * 1.9.3 2023-09-27 [1] CRAN (R 4.3.1)
magrittr 2.0.3 2022-03-30 [1] CRAN (R 4.3.0)
markdown 1.12 2023-12-06 [1] CRAN (R 4.3.1)
MASS 7.3-60.0.1 2024-01-13 [1] CRAN (R 4.3.3)
Matrix * 1.6-5 2024-01-11 [1] CRAN (R 4.3.3)
MatrixModels 0.5-3 2023-11-06 [1] CRAN (R 4.3.1)
memoise 2.0.1 2021-11-26 [1] CRAN (R 4.3.0)
mgcv 1.9-1 2023-12-21 [1] CRAN (R 4.3.3)
mime 0.12 2021-09-28 [1] CRAN (R 4.3.0)
miniUI 0.1.1.1 2018-05-18 [1] CRAN (R 4.3.0)
modeldata * 1.3.0 2024-01-21 [1] CRAN (R 4.3.1)
ModelMetrics 1.2.2.2 2020-03-17 [1] CRAN (R 4.3.0)
multcomp 1.4-25 2023-06-20 [1] CRAN (R 4.3.0)
munsell 0.5.0 2018-06-12 [1] CRAN (R 4.3.0)
mvtnorm 1.2-4 2023-11-27 [1] CRAN (R 4.3.1)
nlme 3.1-164 2023-11-27 [1] CRAN (R 4.3.3)
nnet 7.3-19 2023-05-03 [1] CRAN (R 4.3.3)
openxlsx * 4.2.5.2 2023-02-06 [1] CRAN (R 4.3.0)
parallelly 1.37.1 2024-02-29 [1] CRAN (R 4.3.1)
parsnip * 1.2.1 2024-03-22 [1] CRAN (R 4.3.1)
patchwork * 1.2.0 2024-01-08 [1] CRAN (R 4.3.1)
pillar 1.9.0 2023-03-22 [1] CRAN (R 4.3.0)
pkgbuild 1.4.4 2024-03-17 [1] CRAN (R 4.3.1)
pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 4.3.0)
pkgload 1.3.4 2024-01-16 [1] CRAN (R 4.3.1)
plotly * 4.10.4 2024-01-13 [1] CRAN (R 4.3.1)
plyr 1.8.9 2023-10-02 [1] CRAN (R 4.3.1)
pmsampsize * 1.1.3 2023-12-06 [1] CRAN (R 4.3.1)
polspline 1.1.24 2023-10-26 [1] CRAN (R 4.3.1)
probably * 1.0.3 2024-02-23 [1] CRAN (R 4.3.1)
pROC 1.18.5 2023-11-01 [1] CRAN (R 4.3.1)
prodlim * 2023.08.28 2023-08-28 [1] CRAN (R 4.3.0)
profvis 0.3.8 2023-05-02 [1] CRAN (R 4.3.0)
promises 1.2.1 2023-08-10 [1] CRAN (R 4.3.0)
proxy 0.4-27 2022-06-09 [1] CRAN (R 4.3.0)
purrr * 1.0.2 2023-08-10 [1] CRAN (R 4.3.0)
quantreg 5.97 2023-08-19 [1] CRAN (R 4.3.0)
R6 2.5.1 2021-08-19 [1] CRAN (R 4.3.0)
Rcpp 1.0.12 2024-01-09 [1] CRAN (R 4.3.1)
readr * 2.1.5 2024-01-10 [1] CRAN (R 4.3.1)
recipes * 1.0.10 2024-02-18 [1] CRAN (R 4.3.1)
remotes 2.5.0 2024-03-17 [1] CRAN (R 4.3.1)
reshape2 1.4.4 2020-04-09 [1] CRAN (R 4.3.0)
rlang 1.1.3 2024-01-10 [1] CRAN (R 4.3.1)
rmarkdown * 2.26 2024-03-05 [1] CRAN (R 4.3.1)
rms * 6.8-0 2024-03-11 [1] CRAN (R 4.3.1)
rpart 4.1.23 2023-12-05 [1] CRAN (R 4.3.3)
rprojroot 2.0.4 2023-11-05 [1] CRAN (R 4.3.1)
rsample * 1.2.1 2024-03-25 [1] CRAN (R 4.3.1)
rstudioapi 0.16.0 2024-03-24 [1] CRAN (R 4.3.1)
runway * 0.1.0 2024-04-02 [1] Github (ML4LHS/runway@e93d55e)
sandwich 3.1-0 2023-12-11 [1] CRAN (R 4.3.1)
sass 0.4.9 2024-03-15 [1] CRAN (R 4.3.1)
scales * 1.3.0 2023-11-28 [1] CRAN (R 4.3.1)
sessioninfo 1.2.2 2021-12-06 [1] CRAN (R 4.3.0)
shape 1.4.6.1 2024-02-23 [1] CRAN (R 4.3.1)
shiny 1.8.1.1 2024-04-02 [1] CRAN (R 4.3.1)
snakecase 0.11.1 2023-08-27 [1] CRAN (R 4.3.0)
SparseM 1.81 2021-02-18 [1] CRAN (R 4.3.0)
stringi 1.8.3 2023-12-11 [1] CRAN (R 4.3.1)
stringr * 1.5.1 2023-11-14 [1] CRAN (R 4.3.1)
survival 3.5-8 2024-02-14 [1] CRAN (R 4.3.3)
TH.data 1.1-2 2023-04-17 [1] CRAN (R 4.3.0)
tibble * 3.2.1 2023-03-20 [1] CRAN (R 4.3.0)
tidymodels * 1.2.0 2024-03-25 [1] CRAN (R 4.3.1)
tidyr * 1.3.1 2024-01-24 [1] CRAN (R 4.3.1)
tidyselect 1.2.1 2024-03-11 [1] CRAN (R 4.3.1)
tidyverse * 2.0.0 2023-02-22 [1] CRAN (R 4.3.0)
timechange 0.3.0 2024-01-18 [1] CRAN (R 4.3.1)
timeDate 4032.109 2023-12-14 [1] CRAN (R 4.3.1)
tune * 1.2.0 2024-03-20 [1] CRAN (R 4.3.1)
tzdb 0.4.0 2023-05-12 [1] CRAN (R 4.3.0)
urlchecker 1.0.1 2021-11-30 [1] CRAN (R 4.3.0)
usethis 2.2.3 2024-02-19 [1] CRAN (R 4.3.1)
utf8 1.2.4 2023-10-22 [1] CRAN (R 4.3.1)
vctrs 0.6.5 2023-12-01 [1] CRAN (R 4.3.1)
vip * 0.4.1 2023-08-21 [1] CRAN (R 4.3.0)
viridisLite 0.4.2 2023-05-02 [1] CRAN (R 4.3.0)
withr 3.0.0 2024-01-16 [1] CRAN (R 4.3.1)
workflows * 1.1.4 2024-02-19 [1] CRAN (R 4.3.1)
workflowsets * 1.1.0 2024-03-21 [1] CRAN (R 4.3.1)
xfun 0.43 2024-03-25 [1] CRAN (R 4.3.1)
xml2 1.3.6 2023-12-04 [1] CRAN (R 4.3.1)
xtable 1.8-4 2019-04-21 [1] CRAN (R 4.3.0)
yaml 2.3.8 2023-12-11 [1] CRAN (R 4.3.1)
yardstick * 1.3.1 2024-03-21 [1] CRAN (R 4.3.1)
zip 2.3.1 2024-01-27 [1] CRAN (R 4.3.1)
zoo * 1.8-12 2023-04-13 [1] CRAN (R 4.3.0)
[1] /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/library
────────────────────────────────────────────────────────────────────